git: 346020138a0f - main - pci: Don't cache the count of MSI/MSI-X messages before allocation

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Tue, 11 Feb 2025 14:12:24 UTC
The branch main has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=346020138a0fd20085ebc285f090df38d7d18527

commit 346020138a0fd20085ebc285f090df38d7d18527
Author:     John Baldwin <jhb@FreeBSD.org>
AuthorDate: 2025-02-11 14:11:48 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2025-02-11 14:11:48 +0000

    pci: Don't cache the count of MSI/MSI-X messages before allocation
    
    A device can in theory change the read-only fields in the MSI/MSI-X
    control registers that indicate the maximum number of supported
    registers in response to changing other device registers.  For
    example, certain Intel networking VFs change the number of messages as
    a result of changes in the PCI_IOV_ADD_VF callback.
    
    To support this, always read the current value of the relevant control
    register in the *_count and *_alloc methods.  Once messages have been
    allocated, the control register value remains cached.
    
    Reported by:    Krzysztof Galazka <krzysztof.galazka@intel.com>
    Reviewed by:    Krzysztof Galazka <krzysztof.galazka@intel.com>, erj
    Differential Revision:  https://reviews.freebsd.org/D48890
---
 sys/dev/iavf/iavf_lib.c      | 25 ----------------
 sys/dev/iavf/iavf_lib.h      |  1 -
 sys/dev/iavf/if_iavf_iflib.c |  3 --
 sys/dev/pci/pci.c            | 70 ++++++++++++++++++++++++++------------------
 sys/dev/pci/pcireg.h         |  3 ++
 sys/dev/pci/pcivar.h         |  2 --
 6 files changed, 45 insertions(+), 59 deletions(-)

diff --git a/sys/dev/iavf/iavf_lib.c b/sys/dev/iavf/iavf_lib.c
index 3116ce0501c2..433d31904ea4 100644
--- a/sys/dev/iavf/iavf_lib.c
+++ b/sys/dev/iavf/iavf_lib.c
@@ -1463,31 +1463,6 @@ iavf_mark_del_vlan_filter(struct iavf_sc *sc, u16 vtag)
 	return (i);
 }
 
-/**
- * iavf_update_msix_devinfo - Fix MSIX values for pci_msix_count()
- * @dev: pointer to kernel device
- *
- * Fix cached MSI-X control register information. This is a workaround
- * for an issue where VFs spawned in non-passthrough mode on FreeBSD
- * will have their PCI information cached before the PF driver
- * finishes updating their PCI information.
- *
- * @pre Must be called before pci_msix_count()
- */
-void
-iavf_update_msix_devinfo(device_t dev)
-{
-	struct pci_devinfo *dinfo;
-	u32 msix_ctrl;
-	u8 msix_location;
-
-	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
-	msix_location = dinfo->cfg.msix.msix_location;
-	msix_ctrl = pci_read_config(dev, msix_location + PCIR_MSIX_CTRL, 2);
-	dinfo->cfg.msix.msix_ctrl = msix_ctrl;
-	dinfo->cfg.msix.msix_msgnum = (msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
-}
-
 /**
  * iavf_disable_queues_with_retries - Send PF multiple DISABLE_QUEUES messages
  * @sc: device softc
diff --git a/sys/dev/iavf/iavf_lib.h b/sys/dev/iavf/iavf_lib.h
index f3ccd9f0c52f..2f874b2e4410 100644
--- a/sys/dev/iavf/iavf_lib.h
+++ b/sys/dev/iavf/iavf_lib.h
@@ -474,7 +474,6 @@ struct iavf_mac_filter *
 u64 iavf_baudrate_from_link_speed(struct iavf_sc *sc);
 void iavf_add_vlan_filter(struct iavf_sc *sc, u16 vtag);
 int iavf_mark_del_vlan_filter(struct iavf_sc *sc, u16 vtag);
-void iavf_update_msix_devinfo(device_t dev);
 void iavf_disable_queues_with_retries(struct iavf_sc *);
 
 int iavf_sysctl_current_speed(SYSCTL_HANDLER_ARGS);
diff --git a/sys/dev/iavf/if_iavf_iflib.c b/sys/dev/iavf/if_iavf_iflib.c
index d460df6e0d25..e4dd3b1e59a4 100644
--- a/sys/dev/iavf/if_iavf_iflib.c
+++ b/sys/dev/iavf/if_iavf_iflib.c
@@ -379,9 +379,6 @@ iavf_if_attach_pre(if_ctx_t ctx)
 	scctx->isc_capabilities = scctx->isc_capenable = IAVF_CAPS;
 	scctx->isc_tx_csum_flags = CSUM_OFFLOAD;
 
-	/* Update OS cache of MSIX control register values */
-	iavf_update_msix_devinfo(dev);
-
 	return (0);
 
 err_vc_tq:
diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c
index 06bff2e96e89..ad13e7f9a3a4 100644
--- a/sys/dev/pci/pci.c
+++ b/sys/dev/pci/pci.c
@@ -952,14 +952,10 @@ pci_read_cap(device_t pcib, pcicfgregs *cfg)
 		case PCIY_MSI:		/* PCI MSI */
 			cfg->msi.msi_location = ptr;
 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
-			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
-						     PCIM_MSICTRL_MMC_MASK)>>1);
 			break;
 		case PCIY_MSIX:		/* PCI MSI-X */
 			cfg->msix.msix_location = ptr;
 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
-			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
-			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
 			cfg->msix.msix_table_bar = PCIR_BAR(val &
 			    PCIM_MSIX_BIR_MASK);
@@ -1734,7 +1730,7 @@ pci_mask_msix(device_t dev, u_int index)
 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
 	uint32_t offset, val;
 
-	KASSERT(msix->msix_msgnum > index, ("bogus index"));
+	KASSERT(PCI_MSIX_MSGNUM(msix->msix_ctrl) > index, ("bogus index"));
 	offset = msix->msix_table_offset + index * 16 + 12;
 	val = bus_read_4(msix->msix_table_res, offset);
 	val |= PCIM_MSIX_VCTRL_MASK;
@@ -1753,7 +1749,7 @@ pci_unmask_msix(device_t dev, u_int index)
 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
 	uint32_t offset, val;
 
-	KASSERT(msix->msix_table_len > index, ("bogus index"));
+	KASSERT(PCI_MSIX_MSGNUM(msix->msix_ctrl) > index, ("bogus index"));
 	offset = msix->msix_table_offset + index * 16 + 12;
 	val = bus_read_4(msix->msix_table_res, offset);
 	val &= ~PCIM_MSIX_VCTRL_MASK;
@@ -1790,11 +1786,13 @@ pci_resume_msix(device_t dev)
 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
 	struct msix_table_entry *mte;
 	struct msix_vector *mv;
-	u_int i;
+	u_int i, msgnum;
 
 	if (msix->msix_alloc > 0) {
+		msgnum = PCI_MSIX_MSGNUM(msix->msix_ctrl);
+
 		/* First, mask all vectors. */
-		for (i = 0; i < msix->msix_msgnum; i++)
+		for (i = 0; i < msgnum; i++)
 			pci_mask_msix(dev, i);
 
 		/* Second, program any messages with at least one handler. */
@@ -1825,6 +1823,7 @@ pci_alloc_msix_method(device_t dev, device_t child, int *count)
 	struct resource_list_entry *rle;
 	u_int actual, i, max;
 	int error, irq;
+	uint16_t ctrl, msgnum;
 
 	/* Don't let count == 0 get us into trouble. */
 	if (*count < 1)
@@ -1863,11 +1862,14 @@ pci_alloc_msix_method(device_t dev, device_t child, int *count)
 	}
 	cfg->msix.msix_pba_res = rle->res;
 
+	ctrl = pci_read_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
+	    2);
+	msgnum = PCI_MSIX_MSGNUM(ctrl);
 	if (bootverbose)
 		device_printf(child,
 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
-		    *count, cfg->msix.msix_msgnum);
-	max = min(*count, cfg->msix.msix_msgnum);
+		    *count, msgnum);
+	max = min(*count, msgnum);
 	for (i = 0; i < max; i++) {
 		/* Allocate a message. */
 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
@@ -1927,7 +1929,7 @@ pci_alloc_msix_method(device_t dev, device_t child, int *count)
 	}
 
 	/* Mask all vectors. */
-	for (i = 0; i < cfg->msix.msix_msgnum; i++)
+	for (i = 0; i < msgnum; i++)
 		pci_mask_msix(child, i);
 
 	/* Allocate and initialize vector data and virtual table. */
@@ -1942,9 +1944,10 @@ pci_alloc_msix_method(device_t dev, device_t child, int *count)
 	}
 
 	/* Update control register to enable MSI-X. */
-	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
+	ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
-	    cfg->msix.msix_ctrl, 2);
+	    ctrl, 2);
+	cfg->msix.msix_ctrl = ctrl;
 
 	/* Update counts of alloc'd messages. */
 	cfg->msix.msix_alloc = actual;
@@ -2007,7 +2010,7 @@ pci_remap_msix_method(device_t dev, device_t child, int count,
 	 * table can't be bigger than the actual MSI-X table in the
 	 * device.
 	 */
-	if (count < 1 || count > msix->msix_msgnum)
+	if (count < 1 || count > PCI_MSIX_MSGNUM(msix->msix_ctrl))
 		return (EINVAL);
 
 	/* Sanity check the vectors. */
@@ -2173,9 +2176,13 @@ pci_msix_count_method(device_t dev, device_t child)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
+	uint16_t ctrl;
 
-	if (pci_do_msix && msix->msix_location != 0)
-		return (msix->msix_msgnum);
+	if (pci_do_msix && msix->msix_location != 0) {
+		ctrl = pci_read_config(child, msix->msix_location +
+		    PCIR_MSI_CTRL, 2);
+		return (PCI_MSIX_MSGNUM(ctrl));
+	}
 	return (0);
 }
 
@@ -2610,7 +2617,7 @@ pci_alloc_msi_method(device_t dev, device_t child, int *count)
 	struct resource_list_entry *rle;
 	u_int actual, i;
 	int error, irqs[32];
-	uint16_t ctrl;
+	uint16_t ctrl, msgnum;
 
 	/* Don't let count == 0 get us into trouble. */
 	if (*count < 1)
@@ -2633,13 +2640,15 @@ pci_alloc_msi_method(device_t dev, device_t child, int *count)
 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
 		return (ENODEV);
 
+	ctrl = pci_read_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, 2);
+	msgnum = PCI_MSI_MSGNUM(ctrl);
 	if (bootverbose)
 		device_printf(child,
-		    "attempting to allocate %d MSI vectors (%d supported)\n",
-		    *count, cfg->msi.msi_msgnum);
+		    "attempting to allocate %d MSI vectors (%u supported)\n",
+		    *count, msgnum);
 
 	/* Don't ask for more than the device supports. */
-	actual = min(*count, cfg->msi.msi_msgnum);
+	actual = min(*count, msgnum);
 
 	/* Don't ask for more than 32 messages. */
 	actual = min(actual, 32);
@@ -2708,7 +2717,6 @@ pci_alloc_msi_method(device_t dev, device_t child, int *count)
 	}
 
 	/* Update control register with actual count. */
-	ctrl = cfg->msi.msi_ctrl;
 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
 	ctrl |= (ffs(actual) - 1) << 4;
 	cfg->msi.msi_ctrl = ctrl;
@@ -2782,9 +2790,13 @@ pci_msi_count_method(device_t dev, device_t child)
 {
 	struct pci_devinfo *dinfo = device_get_ivars(child);
 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
+	uint16_t ctrl;
 
-	if (pci_do_msi && msi->msi_location != 0)
-		return (msi->msi_msgnum);
+	if (pci_do_msi && msi->msi_location != 0) {
+		ctrl = pci_read_config(child, msi->msi_location + PCIR_MSI_CTRL,
+		    2);
+		return (PCI_MSI_MSGNUM(ctrl));
+	}
 	return (0);
 }
 
@@ -3038,19 +3050,21 @@ pci_print_verbose(struct pci_devinfo *dinfo)
 			    status & PCIM_PSTAT_DMASK);
 		}
 		if (cfg->msi.msi_location) {
-			int ctrl;
+			uint16_t ctrl, msgnum;
 
 			ctrl = cfg->msi.msi_ctrl;
+			msgnum = PCI_MSI_MSGNUM(ctrl);
 			printf("\tMSI supports %d message%s%s%s\n",
-			    cfg->msi.msi_msgnum,
-			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
+			    msgnum, (msgnum == 1) ? "" : "s",
 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
 		}
 		if (cfg->msix.msix_location) {
+			uint16_t msgnum;
+
+			msgnum = PCI_MSIX_MSGNUM(cfg->msix.msix_ctrl);
 			printf("\tMSI-X supports %d message%s ",
-			    cfg->msix.msix_msgnum,
-			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
+			    msgnum, (msgnum == 1) ? "" : "s");
 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
 				printf("in map 0x%x\n",
 				    cfg->msix.msix_table_bar);
diff --git a/sys/dev/pci/pcireg.h b/sys/dev/pci/pcireg.h
index 623deb8b4505..f6aaf30611e4 100644
--- a/sys/dev/pci/pcireg.h
+++ b/sys/dev/pci/pcireg.h
@@ -616,6 +616,8 @@
 #define	PCIM_MSICTRL_MMC_16		0x0008
 #define	PCIM_MSICTRL_MMC_32		0x000A
 #define	PCIM_MSICTRL_MSI_ENABLE		0x0001
+#define	PCI_MSI_MSGNUM(ctrl)						\
+	(1 << (((ctrl) & PCIM_MSICTRL_MMC_MASK) >> 1))
 #define	PCIR_MSI_ADDR		0x4
 #define	PCIR_MSI_ADDR_HIGH	0x8
 #define	PCIR_MSI_DATA		0x8
@@ -965,6 +967,7 @@
 #define	PCIM_MSIXCTRL_MSIX_ENABLE	0x8000
 #define	PCIM_MSIXCTRL_FUNCTION_MASK	0x4000
 #define	PCIM_MSIXCTRL_TABLE_SIZE	0x07FF
+#define	PCI_MSIX_MSGNUM(ctrl)	(((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1)
 #define	PCIR_MSIX_TABLE		0x4
 #define	PCIR_MSIX_PBA		0x8
 #define	PCIM_MSIX_BIR_MASK		0x7
diff --git a/sys/dev/pci/pcivar.h b/sys/dev/pci/pcivar.h
index 0041c5a22b49..c0d54f8e58e0 100644
--- a/sys/dev/pci/pcivar.h
+++ b/sys/dev/pci/pcivar.h
@@ -90,7 +90,6 @@ struct pcicfg_vpd {
 struct pcicfg_msi {
     uint16_t	msi_ctrl;	/* Message Control */
     uint8_t	msi_location;	/* Offset of MSI capability registers. */
-    uint8_t	msi_msgnum;	/* Number of messages */
     int		msi_alloc;	/* Number of allocated messages. */
     uint64_t	msi_addr;	/* Contents of address register. */
     uint16_t	msi_data;	/* Contents of data register. */
@@ -111,7 +110,6 @@ struct msix_table_entry {
 
 struct pcicfg_msix {
     uint16_t	msix_ctrl;	/* Message Control */
-    uint16_t	msix_msgnum;	/* Number of messages */
     uint8_t	msix_location;	/* Offset of MSI-X capability registers. */
     uint8_t	msix_table_bar;	/* BAR containing vector table. */
     uint8_t	msix_pba_bar;	/* BAR containing PBA. */