git: 0fb0306a89ad - stable/13 - x86: Support multiple PCI MCFG regions

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Thu, 18 Jan 2024 23:39:24 UTC
The branch stable/13 has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=0fb0306a89adf80f9d9af9044cb1f7af8a2f9155

commit 0fb0306a89adf80f9d9af9044cb1f7af8a2f9155
Author:     John Baldwin <jhb@FreeBSD.org>
AuthorDate: 2023-11-29 18:32:39 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2024-01-18 23:27:25 +0000

    x86: Support multiple PCI MCFG regions
    
    In particular, this enables support for PCI config access for domains
    (segments) other than 0.
    
    Reported by:    cperciva
    Tested by:      cperciva (m7i.metal-48xl AWS instance)
    Reviewed by:    imp
    Relnotes:       yes
    Differential Revision:  https://reviews.freebsd.org/D42828
    
    (cherry picked from commit f54a3890b1f419d4a9edc99000efef52b9071b8f)
---
 sys/amd64/pci/pci_cfgreg.c   | 122 ++++++++++++++++++++++++++-------------
 sys/dev/acpica/acpi.c        |   7 +--
 sys/i386/pci/pci_cfgreg.c    | 133 ++++++++++++++++++++++++++++++-------------
 sys/x86/include/pci_cfgreg.h |   2 +-
 sys/x86/x86/legacy.c         |   4 +-
 5 files changed, 181 insertions(+), 87 deletions(-)

diff --git a/sys/amd64/pci/pci_cfgreg.c b/sys/amd64/pci/pci_cfgreg.c
index dd177b6e9a8c..0f591cb0112a 100644
--- a/sys/amd64/pci/pci_cfgreg.c
+++ b/sys/amd64/pci/pci_cfgreg.c
@@ -34,6 +34,7 @@
 #include <sys/bus.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
+#include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <dev/pci/pcivar.h>
@@ -42,12 +43,21 @@
 #include <vm/pmap.h>
 #include <machine/pci_cfgreg.h>
 
+struct pcie_mcfg_region {
+	char *base;
+	uint16_t domain;
+	uint8_t minbus;
+	uint8_t maxbus;
+};
+
 static uint32_t	pci_docfgregread(int domain, int bus, int slot, int func,
 		    int reg, int bytes);
-static int	pciereg_cfgread(int domain, int bus, unsigned slot,
-		    unsigned func, unsigned reg, unsigned bytes);
-static void	pciereg_cfgwrite(int domain, int bus, unsigned slot,
-		    unsigned func, unsigned reg, int data, unsigned bytes);
+static struct pcie_mcfg_region *pcie_lookup_region(int domain, int bus);
+static int	pciereg_cfgread(struct pcie_mcfg_region *region, int bus,
+		    unsigned slot, unsigned func, unsigned reg, unsigned bytes);
+static void	pciereg_cfgwrite(struct pcie_mcfg_region *region, int bus,
+		    unsigned slot, unsigned func, unsigned reg, int data,
+		    unsigned bytes);
 static int	pcireg_cfgread(int bus, int slot, int func, int reg, int bytes);
 static void	pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes);
 
@@ -60,11 +70,12 @@ SYSCTL_DECL(_hw_pci);
  */
 int cfgmech = CFGMECH_1;
 
-static vm_offset_t pcie_base;
-static int pcie_minbus, pcie_maxbus;
+static struct pcie_mcfg_region *mcfg_regions;
+static int mcfg_numregions;
 static uint32_t pcie_badslots;
 static struct mtx pcicfg_mtx;
 MTX_SYSINIT(pcicfg_mtx, &pcicfg_mtx, "pcicfg_mtx", MTX_SPIN);
+
 static int mcfg_enable = 1;
 SYSCTL_INT(_hw_pci, OID_AUTO, mcfg, CTLFLAG_RDTUN, &mcfg_enable, 0,
     "Enable support for PCI-e memory mapped config access");
@@ -76,16 +87,33 @@ pci_cfgregopen(void)
 	return (1);
 }
 
+static struct pcie_mcfg_region *
+pcie_lookup_region(int domain, int bus)
+{
+	for (int i = 0; i < mcfg_numregions; i++)
+		if (mcfg_regions[i].domain == domain &&
+		    bus >= mcfg_regions[i].minbus &&
+		    bus <= mcfg_regions[i].maxbus)
+			return (&mcfg_regions[i]);
+	return (NULL);
+}
+
 static uint32_t
 pci_docfgregread(int domain, int bus, int slot, int func, int reg, int bytes)
 {
 	if (domain == 0 && bus == 0 && (1 << slot & pcie_badslots) != 0)
 		return (pcireg_cfgread(bus, slot, func, reg, bytes));
 
-	if (cfgmech == CFGMECH_PCIE &&
-	    (bus >= pcie_minbus && bus <= pcie_maxbus))
-		return (pciereg_cfgread(domain, bus, slot, func, reg, bytes));
-	else if (domain == 0)
+	if (cfgmech == CFGMECH_PCIE) {
+		struct pcie_mcfg_region *region;
+
+		region = pcie_lookup_region(domain, bus);
+		if (region != NULL)
+			return (pciereg_cfgread(region, bus, slot, func, reg,
+			    bytes));
+	}
+
+	if (domain == 0)
 		return (pcireg_cfgread(bus, slot, func, reg, bytes));
 	else
 		return (-1);
@@ -130,10 +158,18 @@ pci_cfgregwrite_domain(int domain, int bus, int slot, int func, int reg,
 		return;
 	}
 
-	if (cfgmech == CFGMECH_PCIE &&
-	    (bus >= pcie_minbus && bus <= pcie_maxbus))
-		pciereg_cfgwrite(domain, bus, slot, func, reg, data, bytes);
-	else if (domain == 0)
+	if (cfgmech == CFGMECH_PCIE) {
+		struct pcie_mcfg_region *region;
+
+		region = pcie_lookup_region(domain, bus);
+		if (region != NULL) {
+			pciereg_cfgwrite(region, bus, slot, func, reg, data,
+			    bytes);
+			return;
+		}
+	}
+
+	if (domain == 0)
 		pcireg_cfgwrite(bus, slot, func, reg, data, bytes);
 }
 
@@ -219,7 +255,7 @@ pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes)
 }
 
 static void
-pcie_init_badslots(void)
+pcie_init_badslots(struct pcie_mcfg_region *region)
 {
 	uint32_t val1, val2;
 	int slot;
@@ -236,7 +272,7 @@ pcie_init_badslots(void)
 			if (val1 == 0xffffffff)
 				continue;
 
-			val2 = pciereg_cfgread(0, 0, slot, 0, 0, 4);
+			val2 = pciereg_cfgread(region, 0, slot, 0, 0, 4);
 			if (val2 != val1)
 				pcie_badslots |= (1 << slot);
 		}
@@ -244,26 +280,34 @@ pcie_init_badslots(void)
 }
 
 int
-pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus)
+pcie_cfgregopen(uint64_t base, uint16_t domain, uint8_t minbus, uint8_t maxbus)
 {
+	struct pcie_mcfg_region *region;
 
 	if (!mcfg_enable)
 		return (0);
 
-	if (minbus != 0)
-		return (0);
-
 	if (bootverbose)
-		printf("PCIe: Memory Mapped configuration base @ 0x%lx\n",
-		    base);
+		printf("PCI: MCFG domain %u bus %u-%u base @ 0x%lx\n",
+		    domain, minbus, maxbus, base);
+
+	/* Resize the array. */
+	mcfg_regions = realloc(mcfg_regions,
+	    sizeof(*mcfg_regions) * (mcfg_numregions + 1), M_DEVBUF, M_WAITOK);
+
+	region = &mcfg_regions[mcfg_numregions];
 
 	/* XXX: We should make sure this really fits into the direct map. */
-	pcie_base = (vm_offset_t)pmap_mapdev_pciecfg(base, (maxbus + 1) << 20);
-	pcie_minbus = minbus;
-	pcie_maxbus = maxbus;
+	region->base = pmap_mapdev_pciecfg(base, (maxbus + 1 - minbus) << 20);
+	region->domain = domain;
+	region->minbus = minbus;
+	region->maxbus = maxbus;
+	mcfg_numregions++;
+
 	cfgmech = CFGMECH_PCIE;
 
-	pcie_init_badslots();
+	if (domain == 0 && minbus == 0)
+		pcie_init_badslots(region);
 
 	return (1);
 }
@@ -284,17 +328,18 @@ pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus)
  */
 
 static int
-pciereg_cfgread(int domain, int bus, unsigned slot, unsigned func, unsigned reg,
-    unsigned bytes)
+pciereg_cfgread(struct pcie_mcfg_region *region, int bus, unsigned slot,
+    unsigned func, unsigned reg, unsigned bytes)
 {
-	vm_offset_t va;
+	char *va;
 	int data = -1;
 
-	if (domain != 0 || bus < pcie_minbus || bus > pcie_maxbus ||
-	    slot > PCI_SLOTMAX || func > PCI_FUNCMAX || reg > PCIE_REGMAX)
+	MPASS(bus >= region->minbus && bus <= region->maxbus);
+
+	if (slot > PCI_SLOTMAX || func > PCI_FUNCMAX || reg > PCIE_REGMAX)
 		return (-1);
 
-	va = PCIE_VADDR(pcie_base, reg, bus, slot, func);
+	va = PCIE_VADDR(region->base, reg, bus - region->minbus, slot, func);
 
 	switch (bytes) {
 	case 4:
@@ -315,16 +360,17 @@ pciereg_cfgread(int domain, int bus, unsigned slot, unsigned func, unsigned reg,
 }
 
 static void
-pciereg_cfgwrite(int domain, int bus, unsigned slot, unsigned func,
-    unsigned reg, int data, unsigned bytes)
+pciereg_cfgwrite(struct pcie_mcfg_region *region, int bus, unsigned slot,
+    unsigned func, unsigned reg, int data, unsigned bytes)
 {
-	vm_offset_t va;
+	char *va;
+
+	MPASS(bus >= region->minbus && bus <= region->maxbus);
 
-	if (domain != 0 || bus < pcie_minbus || bus > pcie_maxbus ||
-	    slot > PCI_SLOTMAX || func > PCI_FUNCMAX || reg > PCIE_REGMAX)
+	if (slot > PCI_SLOTMAX || func > PCI_FUNCMAX || reg > PCIE_REGMAX)
 		return;
 
-	va = PCIE_VADDR(pcie_base, reg, bus, slot, func);
+	va = PCIE_VADDR(region->base, reg, bus - region->minbus, slot, func);
 
 	switch (bytes) {
 	case 4:
diff --git a/sys/dev/acpica/acpi.c b/sys/dev/acpica/acpi.c
index ca88acb60966..fbb3c2bc7c1c 100644
--- a/sys/dev/acpica/acpi.c
+++ b/sys/dev/acpica/acpi.c
@@ -2091,11 +2091,8 @@ acpi_enable_pcie(void)
 	end = (ACPI_MCFG_ALLOCATION *)((char *)hdr + hdr->Length);
 	alloc = (ACPI_MCFG_ALLOCATION *)((ACPI_TABLE_MCFG *)hdr + 1);
 	while (alloc < end) {
-		if (alloc->PciSegment == 0) {
-			pcie_cfgregopen(alloc->Address, alloc->StartBusNumber,
-			    alloc->EndBusNumber);
-			return;
-		}
+		pcie_cfgregopen(alloc->Address, alloc->PciSegment,
+		    alloc->StartBusNumber, alloc->EndBusNumber);
 		alloc++;
 	}
 #endif
diff --git a/sys/i386/pci/pci_cfgreg.c b/sys/i386/pci/pci_cfgreg.c
index 2129782063d3..231205dde28c 100644
--- a/sys/i386/pci/pci_cfgreg.c
+++ b/sys/i386/pci/pci_cfgreg.c
@@ -55,6 +55,13 @@
 		printf a ;						\
 } while(0)
 
+struct pcie_mcfg_region {
+	uint64_t base;
+	uint16_t domain;
+	uint8_t minbus;
+	uint8_t maxbus;
+};
+
 #define PCIE_CACHE 8
 struct pcie_cfg_elem {
 	TAILQ_ENTRY(pcie_cfg_elem)	elem;
@@ -64,26 +71,30 @@ struct pcie_cfg_elem {
 
 SYSCTL_DECL(_hw_pci);
 
+static struct pcie_mcfg_region *mcfg_regions;
+static int mcfg_numregions;
 static TAILQ_HEAD(pcie_cfg_list, pcie_cfg_elem) pcie_list[MAXCPU];
-static uint64_t pcie_base;
-static int pcie_minbus, pcie_maxbus;
+static int pcie_cache_initted;
 static uint32_t pcie_badslots;
 int cfgmech;
 static int devmax;
 static struct mtx pcicfg_mtx;
+
 static int mcfg_enable = 1;
 SYSCTL_INT(_hw_pci, OID_AUTO, mcfg, CTLFLAG_RDTUN, &mcfg_enable, 0,
     "Enable support for PCI-e memory mapped config access");
 
 static uint32_t	pci_docfgregread(int domain, int bus, int slot, int func,
 		    int reg, int bytes);
+static struct pcie_mcfg_region *pcie_lookup_region(int domain, int bus);
 static int	pcireg_cfgread(int bus, int slot, int func, int reg, int bytes);
 static void	pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes);
 static int	pcireg_cfgopen(void);
-static int	pciereg_cfgread(int domain, int bus, unsigned slot,
-		    unsigned func, unsigned reg, unsigned bytes);
-static void	pciereg_cfgwrite(int domain, int bus, unsigned slot,
-		    unsigned func, unsigned reg, int data, unsigned bytes);
+static int	pciereg_cfgread(struct pcie_mcfg_region *region, int bus,
+		    unsigned slot, unsigned func, unsigned reg, unsigned bytes);
+static void	pciereg_cfgwrite(struct pcie_mcfg_region *region, int bus,
+		    unsigned slot, unsigned func, unsigned reg, int data,
+		    unsigned bytes);
 
 /*
  * Some BIOS writers seem to want to ignore the spec and put
@@ -150,16 +161,33 @@ pci_cfgregopen(void)
 	return (1);
 }
 
+static struct pcie_mcfg_region *
+pcie_lookup_region(int domain, int bus)
+{
+	for (int i = 0; i < mcfg_numregions; i++)
+		if (mcfg_regions[i].domain == domain &&
+		    bus >= mcfg_regions[i].minbus &&
+		    bus <= mcfg_regions[i].maxbus)
+			return (&mcfg_regions[i]);
+	return (NULL);
+}
+
 static uint32_t
 pci_docfgregread(int domain, int bus, int slot, int func, int reg, int bytes)
 {
 	if (domain == 0 && bus == 0 && (1 << slot & pcie_badslots) != 0)
 		return (pcireg_cfgread(bus, slot, func, reg, bytes));
 
-	if (cfgmech == CFGMECH_PCIE &&
-	    (bus >= pcie_minbus && bus <= pcie_maxbus))
-		return (pciereg_cfgread(domain, bus, slot, func, reg, bytes));
-	else if (domain == 0)
+	if (cfgmech == CFGMECH_PCIE) {
+		struct pcie_mcfg_region *region;
+
+		region = pcie_lookup_region(domain, bus);
+		if (region != NULL)
+			return (pciereg_cfgread(region, bus, slot, func, reg,
+			    bytes));
+	}
+
+	if (domain == 0)
 		return (pcireg_cfgread(bus, slot, func, reg, bytes));
 	else
 		return (-1);
@@ -198,10 +226,18 @@ pci_cfgregwrite(int domain, int bus, int slot, int func, int reg, uint32_t data,
 		return;
 	}
 
-	if (cfgmech == CFGMECH_PCIE &&
-	    (bus >= pcie_minbus && bus <= pcie_maxbus))
-		pciereg_cfgwrite(domain, bus, slot, func, reg, data, bytes);
-	else if (domain == 0)
+	if (cfgmech == CFGMECH_PCIE) {
+		struct pcie_mcfg_region *region;
+
+		region = pcie_lookup_region(domain, bus);
+		if (region != NULL) {
+			pciereg_cfgwrite(region, bus, slot, func, reg, data,
+			    bytes);
+			return;
+		}
+	}
+
+	if (domain == 0)
 		pcireg_cfgwrite(bus, slot, func, reg, data, bytes);
 }
 
@@ -480,7 +516,7 @@ pcie_init_cache(void)
 }
 
 static void
-pcie_init_badslots(void)
+pcie_init_badslots(struct pcie_mcfg_region *region)
 {
 	uint32_t val1, val2;
 	int slot;
@@ -497,7 +533,7 @@ pcie_init_badslots(void)
 			if (val1 == 0xffffffff)
 				continue;
 
-			val2 = pciereg_cfgread(0, 0, slot, 0, 0, 4);
+			val2 = pciereg_cfgread(region, 0, slot, 0, 0, 4);
 			if (val2 != val1)
 				pcie_badslots |= (1 << slot);
 		}
@@ -505,37 +541,51 @@ pcie_init_badslots(void)
 }
 
 int
-pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus)
+pcie_cfgregopen(uint64_t base, uint16_t domain, uint8_t minbus, uint8_t maxbus)
 {
+	struct pcie_mcfg_region *region;
 
 	if (!mcfg_enable)
 		return (0);
 
-	if (minbus != 0)
-		return (0);
-
 	if (!pae_mode && base >= 0x100000000) {
 		if (bootverbose)
 			printf(
-	    "PCI: Memory Mapped PCI configuration area base 0x%jx too high\n",
-			    (uintmax_t)base);
+	    "PCI: MCFG domain %u bus %u-%u base 0x%jx too high\n",
+			domain, minbus, maxbus, (uintmax_t)base);
 		return (0);
 	}
 
 	if (bootverbose)
-		printf("PCIe: Memory Mapped configuration base @ 0x%jx\n",
-		    (uintmax_t)base);
+		printf("PCI: MCFG domain %u bus %u-%u base @ 0x%jx\n",
+		    domain, minbus, maxbus, (uintmax_t)base);
+
+	if (pcie_cache_initted == 0) {
+		if (!pcie_init_cache())
+			pcie_cache_initted = -1;
+		else
+			pcie_cache_initted = 1;
+	}
 
-	if (!pcie_init_cache())
+	if (pcie_cache_initted == -1)
 		return (0);
 
-	pcie_base = base;
-	pcie_minbus = minbus;
-	pcie_maxbus = maxbus;
+	/* Resize the array. */
+	mcfg_regions = realloc(mcfg_regions,
+	    sizeof(*mcfg_regions) * (mcfg_numregions + 1), M_DEVBUF, M_WAITOK);
+
+	region = &mcfg_regions[mcfg_numregions];
+	region->base = base;
+	region->domain = domain;
+	region->minbus = minbus;
+	region->maxbus = maxbus;
+	mcfg_numregions++;
+
 	cfgmech = CFGMECH_PCIE;
 	devmax = 32;
 
-	pcie_init_badslots();
+	if (domain == 0 && minbus == 0)
+		pcie_init_badslots(region);
 
 	return (1);
 }
@@ -548,13 +598,16 @@ pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus)
 	((reg) & 0xfff)))
 
 static __inline vm_offset_t
-pciereg_findaddr(int bus, unsigned slot, unsigned func, unsigned reg)
+pciereg_findaddr(struct pcie_mcfg_region *region, int bus, unsigned slot,
+    unsigned func, unsigned reg)
 {
 	struct pcie_cfg_list *pcielist;
 	struct pcie_cfg_elem *elem;
 	vm_paddr_t pa, papage;
 
-	pa = PCIE_PADDR(pcie_base, reg, bus, slot, func);
+	MPASS(bus >= region->minbus && bus <= region->maxbus);
+
+	pa = PCIE_PADDR(region->base, reg, bus - region->minbus, slot, func);
 	papage = pa & ~PAGE_MASK;
 
 	/*
@@ -595,18 +648,17 @@ pciereg_findaddr(int bus, unsigned slot, unsigned func, unsigned reg)
  */
 
 static int
-pciereg_cfgread(int domain, int bus, unsigned slot, unsigned func, unsigned reg,
-    unsigned bytes)
+pciereg_cfgread(struct pcie_mcfg_region *region, int bus, unsigned slot,
+    unsigned func, unsigned reg, unsigned bytes)
 {
 	vm_offset_t va;
 	int data = -1;
 
-	if (domain != 0 || bus < pcie_minbus || bus > pcie_maxbus ||
-	    slot > PCI_SLOTMAX || func > PCI_FUNCMAX || reg > PCIE_REGMAX)
+	if (slot > PCI_SLOTMAX || func > PCI_FUNCMAX || reg > PCIE_REGMAX)
 		return (-1);
 
 	critical_enter();
-	va = pciereg_findaddr(bus, slot, func, reg);
+	va = pciereg_findaddr(region, bus, slot, func, reg);
 
 	switch (bytes) {
 	case 4:
@@ -628,17 +680,16 @@ pciereg_cfgread(int domain, int bus, unsigned slot, unsigned func, unsigned reg,
 }
 
 static void
-pciereg_cfgwrite(int domain, int bus, unsigned slot, unsigned func,
-    unsigned reg, int data, unsigned bytes)
+pciereg_cfgwrite(struct pcie_mcfg_region *region, int bus, unsigned slot,
+    unsigned func, unsigned reg, int data, unsigned bytes)
 {
 	vm_offset_t va;
 
-	if (domain != 0 || bus < pcie_minbus || bus > pcie_maxbus ||
-	    slot > PCI_SLOTMAX || func > PCI_FUNCMAX || reg > PCIE_REGMAX)
+	if (slot > PCI_SLOTMAX || func > PCI_FUNCMAX || reg > PCIE_REGMAX)
 		return;
 
 	critical_enter();
-	va = pciereg_findaddr(bus, slot, func, reg);
+	va = pciereg_findaddr(region, bus, slot, func, reg);
 
 	switch (bytes) {
 	case 4:
diff --git a/sys/x86/include/pci_cfgreg.h b/sys/x86/include/pci_cfgreg.h
index 680a4551af07..f80cfe2e49f4 100644
--- a/sys/x86/include/pci_cfgreg.h
+++ b/sys/x86/include/pci_cfgreg.h
@@ -56,7 +56,7 @@ enum {
 extern int cfgmech;
 
 rman_res_t	hostb_alloc_start(int type, rman_res_t start, rman_res_t end, rman_res_t count);
-int		pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus);
+int		pcie_cfgregopen(uint64_t base, uint16_t domain, uint8_t minbus, uint8_t maxbus);
 int		pci_cfgregopen(void);
 u_int32_t	pci_cfgregread_domain(int domain, int bus, int slot, int func, int reg, int bytes);
 void		pci_cfgregwrite_domain(int domain, int bus, int slot, int func, int reg, u_int32_t data, int bytes);
diff --git a/sys/x86/x86/legacy.c b/sys/x86/x86/legacy.c
index 0921f2eb2db2..3a5e878be7ed 100644
--- a/sys/x86/x86/legacy.c
+++ b/sys/x86/x86/legacy.c
@@ -134,14 +134,14 @@ legacy_pci_cfgregopen(device_t dev)
 		case 0x3592:
 			/* Intel 7520 or 7320 */
 			pciebar = pci_cfgregread(0, 0, 0, 0, 0xce, 2) << 16;
-			pcie_cfgregopen(pciebar, 0, 255);
+			pcie_cfgregopen(pciebar, 0, 0, 255);
 			break;
 		case 0x2580:
 		case 0x2584:
 		case 0x2590:
 			/* Intel 915, 925, or 915GM */
 			pciebar = pci_cfgregread(0, 0, 0, 0, 0x48, 4);
-			pcie_cfgregopen(pciebar, 0, 255);
+			pcie_cfgregopen(pciebar, 0, 0, 255);
 			break;
 		}
 	}