git: 705090cba428 - main - DMAR: handle affinity for in-memory data structures

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Wed, 23 Oct 2024 20:00:40 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=705090cba428525507fd0e8597cbf1cf8a68ae35

commit 705090cba428525507fd0e8597cbf1cf8a68ae35
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2024-10-22 01:03:25 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2024-10-23 20:00:10 +0000

    DMAR: handle affinity for in-memory data structures
    
    Reviewed and tested by: markj
    Sponsored by:   Advanced Micro Devices (AMD)
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D47254
---
 sys/x86/iommu/intel_dmar.h    |  1 +
 sys/x86/iommu/intel_drv.c     | 33 +++++++++++++++++++++++++++++++++
 sys/x86/iommu/intel_idpgtbl.c | 17 ++++++++++++++---
 sys/x86/iommu/intel_intrmap.c | 19 +++++++++++++++----
 4 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index c3163abf6f92..1a9b5041975c 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -109,6 +109,7 @@ struct dmar_unit {
 	struct x86_unit_common x86c;
 	uint16_t segment;
 	uint64_t base;
+	int memdomain;
 
 	/* Resources */
 	int reg_rid;
diff --git a/sys/x86/iommu/intel_drv.c b/sys/x86/iommu/intel_drv.c
index f4a1ec06b721..22d04029f9ae 100644
--- a/sys/x86/iommu/intel_drv.c
+++ b/sys/x86/iommu/intel_drv.c
@@ -38,6 +38,7 @@
 
 #include <sys/param.h>
 #include <sys/bus.h>
+#include <sys/domainset.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
@@ -325,12 +326,34 @@ dmar_print_caps(device_t dev, struct dmar_unit *unit,
 	    DMAR_ECAP_IRO(unit->hw_ecap));
 }
 
+/* Remapping Hardware Static Affinity Structure lookup */
+struct rhsa_iter_arg {
+	uint64_t base;
+	u_int proxim_dom;
+};
+
+static int
+dmar_rhsa_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
+{
+	struct rhsa_iter_arg *ria;
+	ACPI_DMAR_RHSA *adr;
+
+	if (dmarh->Type == ACPI_DMAR_TYPE_HARDWARE_AFFINITY) {
+		ria = arg;
+		adr = (ACPI_DMAR_RHSA *)dmarh;
+		if (adr->BaseAddress == ria->base)
+			ria->proxim_dom = adr->ProximityDomain;
+	}
+	return (1);
+}
+
 static int
 dmar_attach(device_t dev)
 {
 	struct dmar_unit *unit;
 	ACPI_DMAR_HARDWARE_UNIT *dmaru;
 	struct iommu_msi_data *dmd;
+	struct rhsa_iter_arg ria;
 	uint64_t timeout;
 	int disable_pmr;
 	int i, error;
@@ -358,6 +381,12 @@ dmar_attach(device_t dev)
 	if (bootverbose)
 		dmar_print_caps(dev, unit, dmaru);
 	dmar_quirks_post_ident(unit);
+	unit->memdomain = -1;
+	ria.base = unit->base;
+	ria.proxim_dom = -1;
+	dmar_iterate_tbl(dmar_rhsa_iter, &ria);
+	if (ria.proxim_dom != -1)
+		unit->memdomain = acpi_map_pxm_to_vm_domainid(ria.proxim_dom);
 
 	timeout = dmar_get_timeout();
 	TUNABLE_UINT64_FETCH("hw.iommu.dmar.timeout", &timeout);
@@ -424,6 +453,10 @@ dmar_attach(device_t dev)
 
 	unit->ctx_obj = vm_pager_allocate(OBJT_PHYS, NULL, IDX_TO_OFF(1 +
 	    DMAR_CTX_CNT), 0, 0, NULL);
+	if (unit->memdomain != -1) {
+		unit->ctx_obj->domain.dr_policy = DOMAINSET_PREF(
+		    unit->memdomain);
+	}
 
 	/*
 	 * Allocate and load the root entry table pointer.  Enable the
diff --git a/sys/x86/iommu/intel_idpgtbl.c b/sys/x86/iommu/intel_idpgtbl.c
index 6f66106822fe..b133dc875515 100644
--- a/sys/x86/iommu/intel_idpgtbl.c
+++ b/sys/x86/iommu/intel_idpgtbl.c
@@ -30,12 +30,13 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/malloc.h>
+#include <sys/domainset.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
+#include <sys/malloc.h>
 #include <sys/memdesc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
@@ -231,6 +232,10 @@ dmar_get_idmap_pgtbl(struct dmar_domain *domain, iommu_gaddr_t maxaddr)
 	tbl->maxaddr = maxaddr;
 	tbl->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL,
 	    IDX_TO_OFF(pglvl_max_pages(tbl->pglvl)), 0, 0, NULL);
+	/*
+	 * Do not set NUMA policy, the identity table might be used
+	 * by more than one unit.
+	 */
 	VM_OBJECT_WLOCK(tbl->pgtbl_obj);
 	dmar_idmap_nextlvl(tbl, 0, 0, 0);
 	VM_OBJECT_WUNLOCK(tbl->pgtbl_obj);
@@ -675,21 +680,27 @@ int
 dmar_domain_alloc_pgtbl(struct dmar_domain *domain)
 {
 	vm_page_t m;
+	struct dmar_unit *unit;
 
 	KASSERT(domain->pgtbl_obj == NULL,
 	    ("already initialized %p", domain));
 
+	unit = domain->dmar;
 	domain->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL,
 	    IDX_TO_OFF(pglvl_max_pages(domain->pglvl)), 0, 0, NULL);
+	if (unit->memdomain != -1) {
+		domain->pgtbl_obj->domain.dr_policy = DOMAINSET_PREF(
+		    unit->memdomain);
+	}
 	DMAR_DOMAIN_PGLOCK(domain);
 	m = iommu_pgalloc(domain->pgtbl_obj, 0, IOMMU_PGF_WAITOK |
 	    IOMMU_PGF_ZERO | IOMMU_PGF_OBJL);
 	/* No implicit free of the top level page table page. */
 	vm_page_wire(m);
 	DMAR_DOMAIN_PGUNLOCK(domain);
-	DMAR_LOCK(domain->dmar);
+	DMAR_LOCK(unit);
 	domain->iodom.flags |= IOMMU_DOMAIN_PGTBL_INITED;
-	DMAR_UNLOCK(domain->dmar);
+	DMAR_UNLOCK(unit);
 	return (0);
 }
 
diff --git a/sys/x86/iommu/intel_intrmap.c b/sys/x86/iommu/intel_intrmap.c
index 87bb7c791dd0..06e41523624b 100644
--- a/sys/x86/iommu/intel_intrmap.c
+++ b/sys/x86/iommu/intel_intrmap.c
@@ -29,6 +29,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
+#include <sys/domainset.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
@@ -337,10 +338,20 @@ dmar_init_irt(struct dmar_unit *unit)
 		return (0);
 	}
 	unit->irte_cnt = roundup_pow_of_two(num_io_irqs);
-	unit->irt = kmem_alloc_contig(unit->irte_cnt * sizeof(dmar_irte_t),
-	    M_ZERO | M_WAITOK, 0, iommu_high, PAGE_SIZE, 0,
-	    DMAR_IS_COHERENT(unit) ?
-	    VM_MEMATTR_DEFAULT : VM_MEMATTR_UNCACHEABLE);
+	if (unit->memdomain == -1) {
+		unit->irt = kmem_alloc_contig(
+		    unit->irte_cnt * sizeof(dmar_irte_t),
+		    M_ZERO | M_WAITOK, 0, iommu_high, PAGE_SIZE, 0,
+		    DMAR_IS_COHERENT(unit) ?
+		    VM_MEMATTR_DEFAULT : VM_MEMATTR_UNCACHEABLE);
+	} else {
+		unit->irt = kmem_alloc_contig_domainset(
+		    DOMAINSET_PREF(unit->memdomain),
+		    unit->irte_cnt * sizeof(dmar_irte_t),
+		    M_ZERO | M_WAITOK, 0, iommu_high, PAGE_SIZE, 0,
+		    DMAR_IS_COHERENT(unit) ?
+		    VM_MEMATTR_DEFAULT : VM_MEMATTR_UNCACHEABLE);
+	}
 	if (unit->irt == NULL)
 		return (ENOMEM);
 	unit->irt_phys = pmap_kextract((vm_offset_t)unit->irt);