git: 2eeb95ccf39a - main - bhyve: make most of the iommu_ops interfaces return error

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Sun, 06 Apr 2025 03:25:55 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=2eeb95ccf39ac5d79326c4482112b6b7dd5fc8b2

commit 2eeb95ccf39ac5d79326c4482112b6b7dd5fc8b2
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2023-12-19 15:41:32 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2025-04-06 03:25:38 +0000

    bhyve: make most of the iommu_ops interfaces return error
    
    and change create_mapping()/remove_mapping() to allow shorten results.
    
    Reviewed by:    markj
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D49629
---
 sys/amd64/vmm/amd/amdvi_hw.c |  39 +++++++-------
 sys/amd64/vmm/intel/vtd.c    |  28 ++++++----
 sys/amd64/vmm/io/iommu.c     | 119 +++++++++++++++++++++++++------------------
 sys/amd64/vmm/io/iommu.h     |  26 +++++-----
 sys/amd64/vmm/io/ppt.c       |  33 ++++++++----
 sys/amd64/vmm/vmm.c          |  22 ++++----
 6 files changed, 159 insertions(+), 108 deletions(-)

diff --git a/sys/amd64/vmm/amd/amdvi_hw.c b/sys/amd64/vmm/amd/amdvi_hw.c
index 87283325600c..831c31277570 100644
--- a/sys/amd64/vmm/amd/amdvi_hw.c
+++ b/sys/amd64/vmm/amd/amdvi_hw.c
@@ -1155,9 +1155,9 @@ amdvi_update_mapping(struct amdvi_domain *domain, vm_paddr_t gpa,
 	return (mapped);
 }
 
-static uint64_t
+static int
 amdvi_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa,
-    uint64_t len)
+    uint64_t len, uint64_t *res_len)
 {
 	struct amdvi_domain *domain;
 
@@ -1165,7 +1165,7 @@ amdvi_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa,
 
 	if (domain->id && !domain->ptp) {
 		printf("ptp is NULL");
-		return (-1);
+		return (EINVAL);
 	}
 
 	/*
@@ -1173,13 +1173,14 @@ amdvi_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa,
 	 * table set-up.
 	 */
 	if (domain->ptp)
-		return (amdvi_update_mapping(domain, gpa, hpa, len, true));
+		*res_len = amdvi_update_mapping(domain, gpa, hpa, len, true);
 	else
-		return (len);
+		*res_len = len;
+	return (0);
 }
 
-static uint64_t
-amdvi_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
+static int
+amdvi_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len, uint64_t *res_len)
 {
 	struct amdvi_domain *domain;
 
@@ -1189,9 +1190,10 @@ amdvi_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
 	 * table set-up.
 	 */
 	if (domain->ptp)
-		return (amdvi_update_mapping(domain, gpa, 0, len, false));
-	return
-	    (len);
+		*res_len = amdvi_update_mapping(domain, gpa, 0, len, false);
+	else
+		*res_len = len;
+	return (0);
 }
 
 static struct amdvi_softc *
@@ -1268,8 +1270,8 @@ amdvi_inv_device(struct amdvi_softc *softc, uint16_t devid)
 	amdvi_wait(softc);
 }
 
-static void
-amdvi_add_device(void *arg, uint16_t devid)
+static int
+amdvi_add_device(void *arg, device_t dev __unused, uint16_t devid)
 {
 	struct amdvi_domain *domain;
 	struct amdvi_softc *softc;
@@ -1282,13 +1284,14 @@ amdvi_add_device(void *arg, uint16_t devid)
 #endif
 	softc = amdvi_find_iommu(devid);
 	if (softc == NULL)
-		return;
+		return (ENXIO);
 	amdvi_set_dte(domain, softc, devid, true);
 	amdvi_inv_device(softc, devid);
+	return (0);
 }
 
-static void
-amdvi_remove_device(void *arg, uint16_t devid)
+static int
+amdvi_remove_device(void *arg, device_t dev __unused, uint16_t devid)
 {
 	struct amdvi_domain *domain;
 	struct amdvi_softc *softc;
@@ -1300,9 +1303,10 @@ amdvi_remove_device(void *arg, uint16_t devid)
 #endif
 	softc = amdvi_find_iommu(devid);
 	if (softc == NULL)
-		return;
+		return (ENXIO);
 	amdvi_set_dte(domain, softc, devid, false);
 	amdvi_inv_device(softc, devid);
+	return (0);
 }
 
 static void
@@ -1357,7 +1361,7 @@ amdvi_disable(void)
 	}
 }
 
-static void
+static int
 amdvi_invalidate_tlb(void *arg)
 {
 	struct amdvi_domain *domain;
@@ -1365,6 +1369,7 @@ amdvi_invalidate_tlb(void *arg)
 	domain = (struct amdvi_domain *)arg;
 	KASSERT(domain, ("domain is NULL"));
 	amdvi_do_inv_domain(domain->id, false);
+	return (0);
 }
 
 const struct iommu_ops iommu_ops_amd = {
diff --git a/sys/amd64/vmm/intel/vtd.c b/sys/amd64/vmm/intel/vtd.c
index 72cedeca6ec1..b56541290a9d 100644
--- a/sys/amd64/vmm/intel/vtd.c
+++ b/sys/amd64/vmm/intel/vtd.c
@@ -431,8 +431,8 @@ vtd_disable(void)
 	}
 }
 
-static void
-vtd_add_device(void *arg, uint16_t rid)
+static int
+vtd_add_device(void *arg, device_t dev __unused, uint16_t rid)
 {
 	int idx;
 	uint64_t *ctxp;
@@ -475,10 +475,11 @@ vtd_add_device(void *arg, uint16_t rid)
 	 * 'Not Present' entries are not cached in either the Context Cache
 	 * or in the IOTLB, so there is no need to invalidate either of them.
 	 */
+	return (0);
 }
 
-static void
-vtd_remove_device(void *arg, uint16_t rid)
+static int
+vtd_remove_device(void *arg, device_t dev __unused, uint16_t rid)
 {
 	int i, idx;
 	uint64_t *ctxp;
@@ -506,6 +507,7 @@ vtd_remove_device(void *arg, uint16_t rid)
 		vtd_ctx_global_invalidate(vtdmap);
 		vtd_iotlb_global_invalidate(vtdmap);
 	}
+	return (0);
 }
 
 #define	CREATE_MAPPING	0
@@ -600,21 +602,24 @@ vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len,
 	return (1UL << ptpshift);
 }
 
-static uint64_t
-vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
+static int
+vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len,
+    uint64_t *res_len)
 {
 
-	return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING));
+	*res_len = vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING);
+	return (0);
 }
 
-static uint64_t
-vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
+static int
+vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len, uint64_t *res_len)
 {
 
-	return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING));
+	*res_len = vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING);
+	return (0);
 }
 
-static void
+static int
 vtd_invalidate_tlb(void *dom)
 {
 	int i;
@@ -628,6 +633,7 @@ vtd_invalidate_tlb(void *dom)
 		vtdmap = vtdmaps[i];
 		vtd_iotlb_global_invalidate(vtdmap);
 	}
+	return (0);
 }
 
 static void *
diff --git a/sys/amd64/vmm/io/iommu.c b/sys/amd64/vmm/io/iommu.c
index dc4a0de94bb6..bd86ac37a83e 100644
--- a/sys/amd64/vmm/io/iommu.c
+++ b/sys/amd64/vmm/io/iommu.c
@@ -58,6 +58,8 @@ static const struct iommu_ops *ops;
 static void *host_domain;
 static eventhandler_tag add_tag, delete_tag;
 
+static void iommu_cleanup_int(bool iommu_disable);
+
 static __inline int
 IOMMU_INIT(void)
 {
@@ -92,48 +94,51 @@ IOMMU_DESTROY_DOMAIN(void *dom)
 		(*ops->destroy_domain)(dom);
 }
 
-static __inline uint64_t
-IOMMU_CREATE_MAPPING(void *domain, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
+static __inline int
+IOMMU_CREATE_MAPPING(void *domain, vm_paddr_t gpa, vm_paddr_t hpa,
+    uint64_t len, uint64_t *res_len)
 {
 
 	if (ops != NULL && iommu_avail)
-		return ((*ops->create_mapping)(domain, gpa, hpa, len));
-	else
-		return (len);		/* XXX */
+		return ((*ops->create_mapping)(domain, gpa, hpa, len, res_len));
+	return (EOPNOTSUPP);
 }
 
 static __inline uint64_t
-IOMMU_REMOVE_MAPPING(void *domain, vm_paddr_t gpa, uint64_t len)
+IOMMU_REMOVE_MAPPING(void *domain, vm_paddr_t gpa, uint64_t len,
+    uint64_t *res_len)
 {
 
 	if (ops != NULL && iommu_avail)
-		return ((*ops->remove_mapping)(domain, gpa, len));
-	else
-		return (len);		/* XXX */
+		return ((*ops->remove_mapping)(domain, gpa, len, res_len));
+	return (EOPNOTSUPP);
 }
 
-static __inline void
-IOMMU_ADD_DEVICE(void *domain, uint16_t rid)
+static __inline int
+IOMMU_ADD_DEVICE(void *domain, device_t dev, uint16_t rid)
 {
 
 	if (ops != NULL && iommu_avail)
-		(*ops->add_device)(domain, rid);
+		return ((*ops->add_device)(domain, dev, rid));
+	return (EOPNOTSUPP);
 }
 
-static __inline void
-IOMMU_REMOVE_DEVICE(void *domain, uint16_t rid)
+static __inline int
+IOMMU_REMOVE_DEVICE(void *domain, device_t dev, uint16_t rid)
 {
 
 	if (ops != NULL && iommu_avail)
-		(*ops->remove_device)(domain, rid);
+		return ((*ops->remove_device)(domain, dev, rid));
+	return (EOPNOTSUPP);
 }
 
-static __inline void
+static __inline int
 IOMMU_INVALIDATE_TLB(void *domain)
 {
 
 	if (ops != NULL && iommu_avail)
-		(*ops->invalidate_tlb)(domain);
+		return ((*ops->invalidate_tlb)(domain));
+	return (0);
 }
 
 static __inline void
@@ -157,14 +162,14 @@ iommu_pci_add(void *arg, device_t dev)
 {
 
 	/* Add new devices to the host domain. */
-	iommu_add_device(host_domain, pci_get_rid(dev));
+	iommu_add_device(host_domain, dev, pci_get_rid(dev));
 }
 
 static void
 iommu_pci_delete(void *arg, device_t dev)
 {
 
-	iommu_remove_device(host_domain, pci_get_rid(dev));
+	iommu_remove_device(host_domain, dev, pci_get_rid(dev));
 }
 
 static void
@@ -230,17 +235,20 @@ iommu_init(void)
 				 * Everything else belongs to the host
 				 * domain.
 				 */
-				iommu_add_device(host_domain,
+				error = iommu_add_device(host_domain, dev,
 				    pci_get_rid(dev));
+				if (error != 0) {
+					iommu_cleanup_int(false);
+					return;
+				}
 			}
 		}
 	}
 	IOMMU_ENABLE();
-
 }
 
-void
-iommu_cleanup(void)
+static void
+iommu_cleanup_int(bool iommu_disable)
 {
 
 	if (add_tag != NULL) {
@@ -251,12 +259,19 @@ iommu_cleanup(void)
 		EVENTHANDLER_DEREGISTER(pci_delete_device, delete_tag);
 		delete_tag = NULL;
 	}
-	IOMMU_DISABLE();
+	if (iommu_disable)
+		IOMMU_DISABLE();
 	IOMMU_DESTROY_DOMAIN(host_domain);
 	host_domain = NULL;
 	IOMMU_CLEANUP();
 }
 
+void
+iommu_cleanup(void)
+{
+	iommu_cleanup_int(true);
+}
+
 void *
 iommu_create_domain(vm_paddr_t maxaddr)
 {
@@ -280,33 +295,39 @@ iommu_destroy_domain(void *dom)
 	IOMMU_DESTROY_DOMAIN(dom);
 }
 
-void
+int
 iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa, size_t len)
 {
 	uint64_t mapped, remaining;
-
-	remaining = len;
-
-	while (remaining > 0) {
-		mapped = IOMMU_CREATE_MAPPING(dom, gpa, hpa, remaining);
-		gpa += mapped;
-		hpa += mapped;
-		remaining -= mapped;
+	int error;
+
+	for (remaining = len; remaining > 0; gpa += mapped, hpa += mapped,
+	    remaining -= mapped) {
+		error = IOMMU_CREATE_MAPPING(dom, gpa, hpa, remaining,
+		    &mapped);
+		if (error != 0) {
+			/* XXXKIB rollback */
+			return (error);
+		}
 	}
+	return (0);
 }
 
-void
+int
 iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len)
 {
 	uint64_t unmapped, remaining;
-
-	remaining = len;
-
-	while (remaining > 0) {
-		unmapped = IOMMU_REMOVE_MAPPING(dom, gpa, remaining);
-		gpa += unmapped;
-		remaining -= unmapped;
+	int error;
+
+	for (remaining = len; remaining > 0; gpa += unmapped,
+	    remaining -= unmapped) {
+		error = IOMMU_REMOVE_MAPPING(dom, gpa, remaining, &unmapped);
+		if (error != 0) {
+			/* XXXKIB ? */
+			return (error);
+		}
 	}
+	return (0);
 }
 
 void *
@@ -316,23 +337,23 @@ iommu_host_domain(void)
 	return (host_domain);
 }
 
-void
-iommu_add_device(void *dom, uint16_t rid)
+int
+iommu_add_device(void *dom, device_t dev, uint16_t rid)
 {
 
-	IOMMU_ADD_DEVICE(dom, rid);
+	return (IOMMU_ADD_DEVICE(dom, dev, rid));
 }
 
-void
-iommu_remove_device(void *dom, uint16_t rid)
+int
+iommu_remove_device(void *dom, device_t dev, uint16_t rid)
 {
 
-	IOMMU_REMOVE_DEVICE(dom, rid);
+	return (IOMMU_REMOVE_DEVICE(dom, dev, rid));
 }
 
-void
+int
 iommu_invalidate_tlb(void *domain)
 {
 
-	IOMMU_INVALIDATE_TLB(domain);
+	return (IOMMU_INVALIDATE_TLB(domain));
 }
diff --git a/sys/amd64/vmm/io/iommu.h b/sys/amd64/vmm/io/iommu.h
index c2891b62b5f2..5294a9d92a6b 100644
--- a/sys/amd64/vmm/io/iommu.h
+++ b/sys/amd64/vmm/io/iommu.h
@@ -35,13 +35,13 @@ typedef void (*iommu_enable_func_t)(void);
 typedef void (*iommu_disable_func_t)(void);
 typedef void *(*iommu_create_domain_t)(vm_paddr_t maxaddr);
 typedef void (*iommu_destroy_domain_t)(void *domain);
-typedef uint64_t (*iommu_create_mapping_t)(void *domain, vm_paddr_t gpa,
-					   vm_paddr_t hpa, uint64_t len);
-typedef uint64_t (*iommu_remove_mapping_t)(void *domain, vm_paddr_t gpa,
-					   uint64_t len);
-typedef void (*iommu_add_device_t)(void *domain, uint16_t rid);
-typedef void (*iommu_remove_device_t)(void *dom, uint16_t rid);
-typedef void (*iommu_invalidate_tlb_t)(void *dom);
+typedef int (*iommu_create_mapping_t)(void *domain, vm_paddr_t gpa,
+    vm_paddr_t hpa, uint64_t len, uint64_t *res_len);
+typedef int (*iommu_remove_mapping_t)(void *domain, vm_paddr_t gpa,
+    uint64_t len, uint64_t *res_len);
+typedef int (*iommu_add_device_t)(void *domain, device_t dev, uint16_t rid);
+typedef int (*iommu_remove_device_t)(void *dom, device_t dev, uint16_t rid);
+typedef int (*iommu_invalidate_tlb_t)(void *dom);
 
 struct iommu_ops {
 	iommu_init_func_t	init;		/* module wide */
@@ -65,10 +65,10 @@ void	iommu_cleanup(void);
 void	*iommu_host_domain(void);
 void	*iommu_create_domain(vm_paddr_t maxaddr);
 void	iommu_destroy_domain(void *dom);
-void	iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa,
-			     size_t len);
-void	iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len);
-void	iommu_add_device(void *dom, uint16_t rid);
-void	iommu_remove_device(void *dom, uint16_t rid);
-void	iommu_invalidate_tlb(void *domain);
+int	iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa,
+	    size_t len);
+int	iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len);
+int	iommu_add_device(void *dom, device_t dev, uint16_t rid);
+int	iommu_remove_device(void *dom, device_t dev, uint16_t rid);
+int	iommu_invalidate_tlb(void *domain);
 #endif
diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c
index 3b043c64fbde..c3b2b57da988 100644
--- a/sys/amd64/vmm/io/ppt.c
+++ b/sys/amd64/vmm/io/ppt.c
@@ -151,14 +151,19 @@ static int
 ppt_attach(device_t dev)
 {
 	struct pptdev *ppt;
-	uint16_t cmd;
+	uint16_t cmd, cmd1;
+	int error;
 
 	ppt = device_get_softc(dev);
 
-	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
+	cmd1 = cmd = pci_read_config(dev, PCIR_COMMAND, 2);
 	cmd &= ~(PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
-	iommu_remove_device(iommu_host_domain(), pci_get_rid(dev));
+	error = iommu_remove_device(iommu_host_domain(), dev, pci_get_rid(dev));
+	if (error != 0) {
+		pci_write_config(dev, PCIR_COMMAND, cmd1, 2);
+		return (error);
+	}
 	num_pptdevs++;
 	TAILQ_INSERT_TAIL(&pptdev_list, ppt, next);
 	ppt->dev = dev;
@@ -173,17 +178,23 @@ static int
 ppt_detach(device_t dev)
 {
 	struct pptdev *ppt;
+	int error;
 
 	ppt = device_get_softc(dev);
 
 	if (ppt->vm != NULL)
 		return (EBUSY);
+	if (iommu_host_domain() != NULL) {
+		error = iommu_add_device(iommu_host_domain(), dev,
+		    pci_get_rid(dev));
+	} else {
+		error = 0;
+	}
+	if (error != 0)
+		return (error);
 	num_pptdevs--;
 	TAILQ_REMOVE(&pptdev_list, ppt, next);
 
-	if (iommu_host_domain() != NULL)
-		iommu_add_device(iommu_host_domain(), pci_get_rid(dev));
-
 	return (0);
 }
 
@@ -410,8 +421,11 @@ ppt_assign_device(struct vm *vm, int bus, int slot, int func)
 	pci_save_state(ppt->dev);
 	ppt_pci_reset(ppt->dev);
 	pci_restore_state(ppt->dev);
+	error = iommu_add_device(vm_iommu_domain(vm), ppt->dev,
+	    pci_get_rid(ppt->dev));
+	if (error != 0)
+		return (error);
 	ppt->vm = vm;
-	iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
 	cmd = pci_read_config(ppt->dev, PCIR_COMMAND, 2);
 	cmd |= PCIM_CMD_BUSMASTEREN | ppt_bar_enables(ppt);
 	pci_write_config(ppt->dev, PCIR_COMMAND, cmd, 2);
@@ -438,9 +452,10 @@ ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
 	ppt_unmap_all_mmio(vm, ppt);
 	ppt_teardown_msi(ppt);
 	ppt_teardown_msix(ppt);
-	iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
+	error = iommu_remove_device(vm_iommu_domain(vm), ppt->dev,
+	    pci_get_rid(ppt->dev));
 	ppt->vm = NULL;
-	return (0);
+	return (error);
 }
 
 int
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 1d410835be88..bd703f63b557 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -745,12 +745,12 @@ vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
 	return (0);
 }
 
-static void
+static int
 vm_iommu_map(struct vm *vm)
 {
 	vm_paddr_t gpa, hpa;
 	struct vm_mem_map *mm;
-	int i;
+	int error, i;
 
 	sx_assert(&vm->mem.mem_segs_lock, SX_LOCKED);
 
@@ -789,15 +789,16 @@ vm_iommu_map(struct vm *vm)
 		}
 	}
 
-	iommu_invalidate_tlb(iommu_host_domain());
+	error = iommu_invalidate_tlb(iommu_host_domain());
+	return (error);
 }
 
-static void
+static int
 vm_iommu_unmap(struct vm *vm)
 {
 	vm_paddr_t gpa;
 	struct vm_mem_map *mm;
-	int i;
+	int error, i;
 
 	sx_assert(&vm->mem.mem_segs_lock, SX_LOCKED);
 
@@ -826,7 +827,8 @@ vm_iommu_unmap(struct vm *vm)
 	 * Invalidate the cached translations associated with the domain
 	 * from which pages were removed.
 	 */
-	iommu_invalidate_tlb(vm->iommu);
+	error = iommu_invalidate_tlb(vm->iommu);
+	return (error);
 }
 
 int
@@ -839,9 +841,9 @@ vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
 		return (error);
 
 	if (ppt_assigned_devices(vm) == 0)
-		vm_iommu_unmap(vm);
+		error = vm_iommu_unmap(vm);
 
-	return (0);
+	return (error);
 }
 
 int
@@ -858,10 +860,12 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
 		vm->iommu = iommu_create_domain(maxaddr);
 		if (vm->iommu == NULL)
 			return (ENXIO);
-		vm_iommu_map(vm);
 	}
 
 	error = ppt_assign_device(vm, bus, slot, func);
+	if (error != 0)
+		return (error);
+	error = vm_iommu_map(vm);
 	return (error);
 }