Re: git: 011e3d0b8b90 - main - cxgbe(4): Perform Conventional Reset instead of FLR on the device.

From: Konstantin Belousov <kostikbel_at_gmail.com>
Date: Thu, 03 Apr 2025 00:52:03 UTC
On Sun, Mar 16, 2025 at 02:35:18AM +0000, Navdeep Parhar wrote:
> The branch main has been updated by np:
> 
> URL: https://cgit.FreeBSD.org/src/commit/?id=011e3d0b8b90a4330f14b2cb7da45ed7b805ed10
> 
> commit 011e3d0b8b90a4330f14b2cb7da45ed7b805ed10
> Author:     Navdeep Parhar <np@FreeBSD.org>
> AuthorDate: 2024-12-07 08:00:49 +0000
> Commit:     Navdeep Parhar <np@FreeBSD.org>
> CommitDate: 2025-03-16 01:16:42 +0000
> 
>     cxgbe(4): Perform Conventional Reset instead of FLR on the device.
>     
>     The driver uses bus_reset_child on its parent to reset itself but that
>     performs an FLR whereas the hardware needs a Conventional Reset[1] for
>     full re-initialization.  Add routines that perform conventional hot
>     reset and use them instead.  The available reset mechanisms are:
>     * PCIe secondary bus reset (default)
>     * PCIe link bounce
>     
>     hw.cxgbe.reset_method can be used to override the default.  The internal
>     PL_RST is also available but is for testing only.
>     
>     [1] 6.6.1 in PCI Express® Base Specification 5.0 version 1.0
>     
>     MFC after:      1 month
>     Sponsored by:   Chelsio Communications
> ---
>  sys/dev/cxgbe/t4_main.c | 137 +++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 118 insertions(+), 19 deletions(-)
> 
> diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
> index 6ee839151db0..20df6a97aa87 100644
> --- a/sys/dev/cxgbe/t4_main.c
> +++ b/sys/dev/cxgbe/t4_main.c
> @@ -633,6 +633,10 @@ static int t4_reset_on_fatal_err = 0;
>  SYSCTL_INT(_hw_cxgbe, OID_AUTO, reset_on_fatal_err, CTLFLAG_RWTUN,
>      &t4_reset_on_fatal_err, 0, "reset adapter on fatal errors");
>  
> +static int t4_reset_method = 1;
> +SYSCTL_INT(_hw_cxgbe, OID_AUTO, reset_method, CTLFLAG_RWTUN, &t4_reset_method,
> +    0, "reset method: 0 = PL_RST, 1 = PCIe secondary bus reset, 2 = PCIe link bounce");
> +
>  static int t4_clock_gate_on_suspend = 0;
>  SYSCTL_INT(_hw_cxgbe, OID_AUTO, clock_gate_on_suspend, CTLFLAG_RWTUN,
>      &t4_clock_gate_on_suspend, 0, "gate the clock on suspend");
> @@ -2535,40 +2539,135 @@ t4_reset_post(device_t dev, device_t child)
>  	return (0);
>  }
>  
> -static int
> -reset_adapter_with_pci_bus_reset(struct adapter *sc)
> -{
> -	int rc;
> -
> -	mtx_lock(&Giant);
> -	rc = BUS_RESET_CHILD(device_get_parent(sc->dev), sc->dev, 0);
> -	mtx_unlock(&Giant);
> -	return (rc);
> -}
> -
>  static int
>  reset_adapter_with_pl_rst(struct adapter *sc)
>  {
> -	suspend_adapter(sc);
> -
>  	/* This is a t4_write_reg without the hw_off_limits check. */
>  	MPASS(sc->error_flags & HW_OFF_LIMITS);
>  	bus_space_write_4(sc->bt, sc->bh, A_PL_RST,
>  			  F_PIORSTMODE | F_PIORST | F_AUTOPCIEPAUSE);
>  	pause("pl_rst", 1 * hz);		/* Wait 1s for reset */
> +	return (0);
> +}
>  
> -	resume_adapter(sc);
> +static int
> +reset_adapter_with_pcie_sbr(struct adapter *sc)
> +{
> +	device_t pdev = device_get_parent(sc->dev);
> +	device_t gpdev = device_get_parent(pdev);
> +	device_t *children;
> +	int rc, i, lcap, lsta, nchildren;
> +	uint32_t v;
>  
> -	return (0);
> +	rc = pci_find_cap(gpdev, PCIY_EXPRESS, &v);
> +	if (rc != 0) {
> +		CH_ERR(sc, "%s: pci_find_cap(%s, pcie) failed: %d\n", __func__,
> +		    device_get_nameunit(gpdev), rc);
> +		return (ENOTSUP);
> +	}
> +	lcap = v + PCIER_LINK_CAP;
> +	lsta = v + PCIER_LINK_STA;
> +
> +	nchildren = 0;
> +	device_get_children(pdev, &children, &nchildren);
> +	for (i = 0; i < nchildren; i++)
> +		pci_save_state(children[i]);
> +	v = pci_read_config(gpdev, PCIR_BRIDGECTL_1, 2);
> +	pci_write_config(gpdev, PCIR_BRIDGECTL_1, v | PCIB_BCR_SECBUS_RESET, 2);
> +	pause("pcie_sbr1", hz / 10);	/* 100ms */
> +	pci_write_config(gpdev, PCIR_BRIDGECTL_1, v, 2);
> +	pause("pcie_sbr2", hz);		/* Wait 1s before restore_state. */
> +	v = pci_read_config(gpdev, lsta, 2);
> +	if (pci_read_config(gpdev, lcap, 2) & PCIEM_LINK_CAP_DL_ACTIVE)
> +		rc = v & PCIEM_LINK_STA_DL_ACTIVE ? 0 : ETIMEDOUT;
> +	else if (v & (PCIEM_LINK_STA_TRAINING_ERROR | PCIEM_LINK_STA_TRAINING))
> +		rc = ETIMEDOUT;
> +	else
> +		rc = 0;
> +	if (rc != 0)
> +		CH_ERR(sc, "%s: PCIe link is down after reset, LINK_STA 0x%x\n",
> +		    __func__, v);
> +	else {
> +		for (i = 0; i < nchildren; i++)
> +			pci_restore_state(children[i]);
> +	}
> +	free(children, M_TEMP);
> +
> +	return (rc);
> +}
> +
> +static int
> +reset_adapter_with_pcie_link_bounce(struct adapter *sc)
We already have pcie_link_reset() that is used for BUS_RESET_CHILD()
on pcib.  See e.g. mlx5_core/mlx5_fwdump.c::mlx5_fw_reset() for its use.

We needed exactly link bounce for that.