git: ad794e6d7d02 - main - x86 iommu: move DMAR-independent parts of the qi code into common

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Wed, 04 Sep 2024 21:50:41 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=ad794e6d7d02a11b01e721859e096efeb258a4d4

commit ad794e6d7d02a11b01e721859e096efeb258a4d4
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2024-06-06 01:16:36 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2024-09-04 21:50:18 +0000

    x86 iommu: move DMAR-independent parts of the qi code into common
    
    Sponsored by:   Advanced Micro Devices (AMD)
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
---
 sys/x86/iommu/intel_ctx.c   |   7 +-
 sys/x86/iommu/intel_dmar.h  |  42 +-----
 sys/x86/iommu/intel_drv.c   |  26 ++--
 sys/x86/iommu/intel_qi.c    | 330 ++++++++++++--------------------------------
 sys/x86/iommu/iommu_utils.c | 234 ++++++++++++++++++++++++++++++-
 sys/x86/iommu/x86_iommu.h   |  72 ++++++++++
 6 files changed, 413 insertions(+), 298 deletions(-)

diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index a3ff35dc527e..03ef196c4cb0 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -887,10 +887,11 @@ dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free,
 	if (unit->qi_enabled) {
 		if (free) {
 			DMAR_LOCK(unit);
-			dmar_qi_invalidate_locked(domain, entry, true);
+			iommu_qi_invalidate_locked(&domain->iodom, entry,
+			    true);
 			DMAR_UNLOCK(unit);
 		} else {
-			dmar_qi_invalidate_sync(domain, entry->start,
+			iommu_qi_invalidate_sync(&domain->iodom, entry->start,
 			    entry->end - entry->start, cansleep);
 			dmar_domain_free_entry(entry, false);
 		}
@@ -943,7 +944,7 @@ dmar_domain_unload(struct iommu_domain *iodom,
 	DMAR_LOCK(unit);
 	while ((entry = TAILQ_FIRST(entries)) != NULL) {
 		TAILQ_REMOVE(entries, entry, dmamap_link);
-		dmar_qi_invalidate_locked(domain, entry,
+		iommu_qi_invalidate_locked(&domain->iodom, entry,
 		    dmar_domain_unload_emit_wait(domain, entry));
 	}
 	DMAR_UNLOCK(unit);
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index 0ede955e12b9..8a815d5cfca6 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -123,6 +123,7 @@ struct dmar_msi_data {
 
 struct dmar_unit {
 	struct iommu_unit iommu;
+	struct x86_unit_common x86c;
 	uint16_t segment;
 	uint64_t base;
 
@@ -155,17 +156,6 @@ struct dmar_unit {
 
 	/* QI */
 	int qi_enabled;
-	char *inv_queue;
-	vm_size_t inv_queue_size;
-	uint32_t inv_queue_avail;
-	uint32_t inv_queue_tail;
-	volatile uint32_t inv_waitd_seq_hw; /* hw writes there on wait
-					       descr completion */
-	uint64_t inv_waitd_seq_hw_phys;
-	uint32_t inv_waitd_seq; /* next sequence number to use for wait descr */
-	u_int inv_waitd_gen;	/* seq number generation AKA seq overflows */
-	u_int inv_seq_waiters;	/* count of waiters for seq */
-	u_int inv_queue_full;	/* informational counter */
 
 	/* IR */
 	int ir_enabled;
@@ -173,36 +163,6 @@ struct dmar_unit {
 	dmar_irte_t *irt;
 	u_int irte_cnt;
 	vmem_t *irtids;
-
-	/*
-	 * Delayed freeing of map entries queue processing:
-	 *
-	 * tlb_flush_head and tlb_flush_tail are used to implement a FIFO
-	 * queue that supports concurrent dequeues and enqueues.  However,
-	 * there can only be a single dequeuer (accessing tlb_flush_head) and
-	 * a single enqueuer (accessing tlb_flush_tail) at a time.  Since the
-	 * unit's qi_task is the only dequeuer, it can access tlb_flush_head
-	 * without any locking.  In contrast, there may be multiple enqueuers,
-	 * so the enqueuers acquire the iommu unit lock to serialize their
-	 * accesses to tlb_flush_tail.
-	 *
-	 * In this FIFO queue implementation, the key to enabling concurrent
-	 * dequeues and enqueues is that the dequeuer never needs to access
-	 * tlb_flush_tail and the enqueuer never needs to access
-	 * tlb_flush_head.  In particular, tlb_flush_head and tlb_flush_tail
-	 * are never NULL, so neither a dequeuer nor an enqueuer ever needs to
-	 * update both.  Instead, tlb_flush_head always points to a "zombie"
-	 * struct, which previously held the last dequeued item.  Thus, the
-	 * zombie's next field actually points to the struct holding the first
-	 * item in the queue.  When an item is dequeued, the current zombie is
-	 * finally freed, and the struct that held the just dequeued item
-	 * becomes the new zombie.  When the queue is empty, tlb_flush_tail
-	 * also points to the zombie.
-	 */
-	struct iommu_map_entry *tlb_flush_head;
-	struct iommu_map_entry *tlb_flush_tail;
-	struct task qi_task;
-	struct taskqueue *qi_taskqueue;
 };
 
 #define	DMAR_LOCK(dmar)		mtx_lock(&DMAR2IOMMU(dmar)->lock)
diff --git a/sys/x86/iommu/intel_drv.c b/sys/x86/iommu/intel_drv.c
index 79350358cced..9fa1b3f98dc6 100644
--- a/sys/x86/iommu/intel_drv.c
+++ b/sys/x86/iommu/intel_drv.c
@@ -1303,19 +1303,19 @@ dmar_print_one(int idx, bool show_domains, bool show_mappings)
 			    "size 0x%jx\n"
 		    "  head 0x%x tail 0x%x avail 0x%x status 0x%x ctrl 0x%x\n"
 		    "  hw compl 0x%x@%p/phys@%jx next seq 0x%x gen 0x%x\n",
-			    (uintmax_t)unit->inv_queue,
+			    (uintmax_t)unit->x86c.inv_queue,
 			    (uintmax_t)dmar_read8(unit, DMAR_IQA_REG),
-			    (uintmax_t)unit->inv_queue_size,
+			    (uintmax_t)unit->x86c.inv_queue_size,
 			    dmar_read4(unit, DMAR_IQH_REG),
 			    dmar_read4(unit, DMAR_IQT_REG),
-			    unit->inv_queue_avail,
+			    unit->x86c.inv_queue_avail,
 			    dmar_read4(unit, DMAR_ICS_REG),
 			    dmar_read4(unit, DMAR_IECTL_REG),
-			    unit->inv_waitd_seq_hw,
-			    &unit->inv_waitd_seq_hw,
-			    (uintmax_t)unit->inv_waitd_seq_hw_phys,
-			    unit->inv_waitd_seq,
-			    unit->inv_waitd_gen);
+			    unit->x86c.inv_waitd_seq_hw,
+			    &unit->x86c.inv_waitd_seq_hw,
+			    (uintmax_t)unit->x86c.inv_waitd_seq_hw_phys,
+			    unit->x86c.inv_waitd_seq,
+			    unit->x86c.inv_waitd_gen);
 		} else {
 			db_printf("qi is disabled\n");
 		}
@@ -1368,7 +1368,17 @@ dmar_find_method(device_t dev, bool verbose)
 	return (&dmar->iommu);
 }
 
+static struct x86_unit_common *
+dmar_get_x86_common(struct iommu_unit *unit)
+{
+	struct dmar_unit *dmar;
+
+	dmar = IOMMU2DMAR(unit);
+	return (&dmar->x86c);
+}
+
 static struct x86_iommu dmar_x86_iommu = {
+	.get_x86_common = dmar_get_x86_common,
 	.domain_unload_entry = dmar_domain_unload_entry,
 	.domain_unload = dmar_domain_unload,
 	.get_ctx = dmar_get_ctx,
diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c
index 590cbac9bcbd..a94fbb54e7f7 100644
--- a/sys/x86/iommu/intel_qi.c
+++ b/sys/x86/iommu/intel_qi.c
@@ -58,17 +58,6 @@
 #include <x86/iommu/x86_iommu.h>
 #include <x86/iommu/intel_dmar.h>
 
-static bool
-dmar_qi_seq_processed(const struct dmar_unit *unit,
-    const struct iommu_qi_genseq *pseq)
-{
-	u_int gen;
-
-	gen = unit->inv_waitd_gen;
-	return (pseq->gen < gen ||
-	    (pseq->gen == gen && pseq->seq <= unit->inv_waitd_seq_hw));
-}
-
 static int
 dmar_enable_qi(struct dmar_unit *unit)
 {
@@ -96,32 +85,36 @@ dmar_disable_qi(struct dmar_unit *unit)
 }
 
 static void
-dmar_qi_advance_tail(struct dmar_unit *unit)
+dmar_qi_advance_tail(struct iommu_unit *iommu)
 {
+	struct dmar_unit *unit;
 
+	unit = IOMMU2DMAR(iommu);
 	DMAR_ASSERT_LOCKED(unit);
-	dmar_write4(unit, DMAR_IQT_REG, unit->inv_queue_tail);
+	dmar_write4(unit, DMAR_IQT_REG, unit->x86c.inv_queue_tail);
 }
 
 static void
-dmar_qi_ensure(struct dmar_unit *unit, int descr_count)
+dmar_qi_ensure(struct iommu_unit *iommu, int descr_count)
 {
+	struct dmar_unit *unit;
 	uint32_t head;
 	int bytes;
 
+	unit = IOMMU2DMAR(iommu);
 	DMAR_ASSERT_LOCKED(unit);
 	bytes = descr_count << DMAR_IQ_DESCR_SZ_SHIFT;
 	for (;;) {
-		if (bytes <= unit->inv_queue_avail)
+		if (bytes <= unit->x86c.inv_queue_avail)
 			break;
 		/* refill */
 		head = dmar_read4(unit, DMAR_IQH_REG);
 		head &= DMAR_IQH_MASK;
-		unit->inv_queue_avail = head - unit->inv_queue_tail -
+		unit->x86c.inv_queue_avail = head - unit->x86c.inv_queue_tail -
 		    DMAR_IQ_DESCR_SZ;
-		if (head <= unit->inv_queue_tail)
-			unit->inv_queue_avail += unit->inv_queue_size;
-		if (bytes <= unit->inv_queue_avail)
+		if (head <= unit->x86c.inv_queue_tail)
+			unit->x86c.inv_queue_avail += unit->x86c.inv_queue_size;
+		if (bytes <= unit->x86c.inv_queue_avail)
 			break;
 
 		/*
@@ -134,11 +127,11 @@ dmar_qi_ensure(struct dmar_unit *unit, int descr_count)
 		 * See dmar_qi_invalidate_locked() for a discussion
 		 * about data race prevention.
 		 */
-		dmar_qi_advance_tail(unit);
-		unit->inv_queue_full++;
+		dmar_qi_advance_tail(DMAR2IOMMU(unit));
+		unit->x86c.inv_queue_full++;
 		cpu_spinwait();
 	}
-	unit->inv_queue_avail -= bytes;
+	unit->x86c.inv_queue_avail -= bytes;
 }
 
 static void
@@ -146,162 +139,60 @@ dmar_qi_emit(struct dmar_unit *unit, uint64_t data1, uint64_t data2)
 {
 
 	DMAR_ASSERT_LOCKED(unit);
-	*(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data1;
-	unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
-	KASSERT(unit->inv_queue_tail <= unit->inv_queue_size,
-	    ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail,
-	    (uintmax_t)unit->inv_queue_size));
-	unit->inv_queue_tail &= unit->inv_queue_size - 1;
-	*(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data2;
-	unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
-	KASSERT(unit->inv_queue_tail <= unit->inv_queue_size,
-	    ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail,
-	    (uintmax_t)unit->inv_queue_size));
-	unit->inv_queue_tail &= unit->inv_queue_size - 1;
+	*(volatile uint64_t *)(unit->x86c.inv_queue +
+	    unit->x86c.inv_queue_tail) = data1;
+	unit->x86c.inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
+	KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size,
+	    ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail,
+	    (uintmax_t)unit->x86c.inv_queue_size));
+	unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1;
+	*(volatile uint64_t *)(unit->x86c.inv_queue +
+	    unit->x86c.inv_queue_tail) = data2;
+	unit->x86c.inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
+	KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size,
+	    ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail,
+	    (uintmax_t)unit->x86c.inv_queue_size));
+	unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1;
 }
 
 static void
-dmar_qi_emit_wait_descr(struct dmar_unit *unit, uint32_t seq, bool intr,
+dmar_qi_emit_wait_descr(struct iommu_unit *iommu, uint32_t seq, bool intr,
     bool memw, bool fence)
 {
+	struct dmar_unit *unit;
 
+	unit = IOMMU2DMAR(iommu);
 	DMAR_ASSERT_LOCKED(unit);
 	dmar_qi_emit(unit, DMAR_IQ_DESCR_WAIT_ID |
 	    (intr ? DMAR_IQ_DESCR_WAIT_IF : 0) |
 	    (memw ? DMAR_IQ_DESCR_WAIT_SW : 0) |
 	    (fence ? DMAR_IQ_DESCR_WAIT_FN : 0) |
 	    (memw ? DMAR_IQ_DESCR_WAIT_SD(seq) : 0),
-	    memw ? unit->inv_waitd_seq_hw_phys : 0);
-}
-
-static void
-dmar_qi_emit_wait_seq(struct dmar_unit *unit, struct iommu_qi_genseq *pseq,
-    bool emit_wait)
-{
-	struct iommu_qi_genseq gsec;
-	uint32_t seq;
-
-	KASSERT(pseq != NULL, ("wait descriptor with no place for seq"));
-	DMAR_ASSERT_LOCKED(unit);
-	if (unit->inv_waitd_seq == 0xffffffff) {
-		gsec.gen = unit->inv_waitd_gen;
-		gsec.seq = unit->inv_waitd_seq;
-		dmar_qi_ensure(unit, 1);
-		dmar_qi_emit_wait_descr(unit, gsec.seq, false, true, false);
-		dmar_qi_advance_tail(unit);
-		while (!dmar_qi_seq_processed(unit, &gsec))
-			cpu_spinwait();
-		unit->inv_waitd_gen++;
-		unit->inv_waitd_seq = 1;
-	}
-	seq = unit->inv_waitd_seq++;
-	pseq->gen = unit->inv_waitd_gen;
-	pseq->seq = seq;
-	if (emit_wait) {
-		dmar_qi_ensure(unit, 1);
-		dmar_qi_emit_wait_descr(unit, seq, true, true, false);
-	}
+	    memw ? unit->x86c.inv_waitd_seq_hw_phys : 0);
 }
 
-/*
- * To avoid missed wakeups, callers must increment the unit's waiters count
- * before advancing the tail past the wait descriptor.
- */
 static void
-dmar_qi_wait_for_seq(struct dmar_unit *unit, const struct iommu_qi_genseq *gseq,
-    bool nowait)
-{
-
-	DMAR_ASSERT_LOCKED(unit);
-	KASSERT(unit->inv_seq_waiters > 0, ("%s: no waiters", __func__));
-	while (!dmar_qi_seq_processed(unit, gseq)) {
-		if (cold || nowait) {
-			cpu_spinwait();
-		} else {
-			msleep(&unit->inv_seq_waiters, &unit->iommu.lock, 0,
-			    "dmarse", hz);
-		}
-	}
-	unit->inv_seq_waiters--;
-}
-
-static void
-dmar_qi_invalidate_emit(struct dmar_domain *domain, iommu_gaddr_t base,
+dmar_qi_invalidate_emit(struct iommu_domain *idomain, iommu_gaddr_t base,
     iommu_gaddr_t size, struct iommu_qi_genseq *pseq, bool emit_wait)
 {
 	struct dmar_unit *unit;
+	struct dmar_domain *domain;
 	iommu_gaddr_t isize;
 	int am;
 
+	domain = __containerof(idomain, struct dmar_domain, iodom);
 	unit = domain->dmar;
 	DMAR_ASSERT_LOCKED(unit);
 	for (; size > 0; base += isize, size -= isize) {
 		am = calc_am(unit, base, size, &isize);
-		dmar_qi_ensure(unit, 1);
+		dmar_qi_ensure(DMAR2IOMMU(unit), 1);
 		dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV |
 		    DMAR_IQ_DESCR_IOTLB_PAGE | DMAR_IQ_DESCR_IOTLB_DW |
 		    DMAR_IQ_DESCR_IOTLB_DR |
 		    DMAR_IQ_DESCR_IOTLB_DID(domain->domain),
 		    base | am);
 	}
-	dmar_qi_emit_wait_seq(unit, pseq, emit_wait);
-}
-
-/*
- * The caller must not be using the entry's dmamap_link field.
- */
-void
-dmar_qi_invalidate_locked(struct dmar_domain *domain,
-    struct iommu_map_entry *entry, bool emit_wait)
-{
-	struct dmar_unit *unit;
-
-	unit = domain->dmar;
-	DMAR_ASSERT_LOCKED(unit);
-	dmar_qi_invalidate_emit(domain, entry->start, entry->end -
-	    entry->start, &entry->gseq, emit_wait);
-
-	/*
-	 * To avoid a data race in dmar_qi_task(), the entry's gseq must be
-	 * initialized before the entry is added to the TLB flush list, and the
-	 * entry must be added to that list before the tail is advanced.  More
-	 * precisely, the tail must not be advanced past the wait descriptor
-	 * that will generate the interrupt that schedules dmar_qi_task() for
-	 * execution before the entry is added to the list.  While an earlier
-	 * call to dmar_qi_ensure() might have advanced the tail, it will not
-	 * advance it past the wait descriptor.
-	 *
-	 * See the definition of struct dmar_unit for more information on
-	 * synchronization.
-	 */
-	entry->tlb_flush_next = NULL;
-	atomic_store_rel_ptr((uintptr_t *)&unit->tlb_flush_tail->tlb_flush_next,
-	    (uintptr_t)entry);
-	unit->tlb_flush_tail = entry;
-
-	dmar_qi_advance_tail(unit);
-}
-
-void
-dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t base,
-    iommu_gaddr_t size, bool cansleep)
-{
-	struct dmar_unit *unit;
-	struct iommu_qi_genseq gseq;
-
-	unit = domain->dmar;
-	DMAR_LOCK(unit);
-	dmar_qi_invalidate_emit(domain, base, size, &gseq, true);
-
-	/*
-	 * To avoid a missed wakeup in dmar_qi_task(), the unit's waiters count
-	 * must be incremented before the tail is advanced.
-	 */
-	unit->inv_seq_waiters++;
-
-	dmar_qi_advance_tail(unit);
-	dmar_qi_wait_for_seq(unit, &gseq, !cansleep);
-	DMAR_UNLOCK(unit);
+	iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), pseq, emit_wait);
 }
 
 void
@@ -310,13 +201,13 @@ dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit)
 	struct iommu_qi_genseq gseq;
 
 	DMAR_ASSERT_LOCKED(unit);
-	dmar_qi_ensure(unit, 2);
+	dmar_qi_ensure(DMAR2IOMMU(unit), 2);
 	dmar_qi_emit(unit, DMAR_IQ_DESCR_CTX_INV | DMAR_IQ_DESCR_CTX_GLOB, 0);
-	dmar_qi_emit_wait_seq(unit, &gseq, true);
+	iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true);
 	/* See dmar_qi_invalidate_sync(). */
-	unit->inv_seq_waiters++;
-	dmar_qi_advance_tail(unit);
-	dmar_qi_wait_for_seq(unit, &gseq, false);
+	unit->x86c.inv_seq_waiters++;
+	dmar_qi_advance_tail(DMAR2IOMMU(unit));
+	iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, false);
 }
 
 void
@@ -325,14 +216,14 @@ dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit)
 	struct iommu_qi_genseq gseq;
 
 	DMAR_ASSERT_LOCKED(unit);
-	dmar_qi_ensure(unit, 2);
+	dmar_qi_ensure(DMAR2IOMMU(unit), 2);
 	dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_GLOB |
 	    DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR, 0);
-	dmar_qi_emit_wait_seq(unit, &gseq, true);
+	iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true);
 	/* See dmar_qi_invalidate_sync(). */
-	unit->inv_seq_waiters++;
-	dmar_qi_advance_tail(unit);
-	dmar_qi_wait_for_seq(unit, &gseq, false);
+	unit->x86c.inv_seq_waiters++;
+	dmar_qi_advance_tail(DMAR2IOMMU(unit));
+	iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, false);
 }
 
 void
@@ -341,13 +232,13 @@ dmar_qi_invalidate_iec_glob(struct dmar_unit *unit)
 	struct iommu_qi_genseq gseq;
 
 	DMAR_ASSERT_LOCKED(unit);
-	dmar_qi_ensure(unit, 2);
+	dmar_qi_ensure(DMAR2IOMMU(unit), 2);
 	dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV, 0);
-	dmar_qi_emit_wait_seq(unit, &gseq, true);
+	iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true);
 	/* See dmar_qi_invalidate_sync(). */
-	unit->inv_seq_waiters++;
-	dmar_qi_advance_tail(unit);
-	dmar_qi_wait_for_seq(unit, &gseq, false);
+	unit->x86c.inv_seq_waiters++;
+	dmar_qi_advance_tail(DMAR2IOMMU(unit));
+	iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, false);
 }
 
 void
@@ -363,21 +254,21 @@ dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt)
 	for (; cnt > 0; cnt -= c, start += c) {
 		l = ffs(start | cnt) - 1;
 		c = 1 << l;
-		dmar_qi_ensure(unit, 1);
+		dmar_qi_ensure(DMAR2IOMMU(unit), 1);
 		dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV |
 		    DMAR_IQ_DESCR_IEC_IDX | DMAR_IQ_DESCR_IEC_IIDX(start) |
 		    DMAR_IQ_DESCR_IEC_IM(l), 0);
 	}
-	dmar_qi_ensure(unit, 1);
-	dmar_qi_emit_wait_seq(unit, &gseq, true);
+	dmar_qi_ensure(DMAR2IOMMU(unit), 1);
+	iommu_qi_emit_wait_seq(DMAR2IOMMU(unit), &gseq, true);
 
 	/*
-	 * Since dmar_qi_wait_for_seq() will not sleep, this increment's
+	 * Since iommu_qi_wait_for_seq() will not sleep, this increment's
 	 * placement relative to advancing the tail doesn't matter.
 	 */
-	unit->inv_seq_waiters++;
+	unit->x86c.inv_seq_waiters++;
 
-	dmar_qi_advance_tail(unit);
+	dmar_qi_advance_tail(DMAR2IOMMU(unit));
 
 	/*
 	 * The caller of the function, in particular,
@@ -394,7 +285,7 @@ dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt)
 	 * queue is processed, which includes requests possibly issued
 	 * before our request.
 	 */
-	dmar_qi_wait_for_seq(unit, &gseq, true);
+	iommu_qi_wait_for_seq(DMAR2IOMMU(unit), &gseq, true);
 }
 
 int
@@ -405,38 +296,18 @@ dmar_qi_intr(void *arg)
 	unit = arg;
 	KASSERT(unit->qi_enabled, ("dmar%d: QI is not enabled",
 	    unit->iommu.unit));
-	taskqueue_enqueue(unit->qi_taskqueue, &unit->qi_task);
+	taskqueue_enqueue(unit->x86c.qi_taskqueue, &unit->x86c.qi_task);
 	return (FILTER_HANDLED);
 }
 
-static void
-dmar_qi_drain_tlb_flush(struct dmar_unit *unit)
-{
-	struct iommu_map_entry *entry, *head;
-
-	for (head = unit->tlb_flush_head;; head = entry) {
-		entry = (struct iommu_map_entry *)
-		    atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next);
-		if (entry == NULL ||
-		    !dmar_qi_seq_processed(unit, &entry->gseq))
-			break;
-		unit->tlb_flush_head = entry;
-		iommu_gas_free_entry(head);
-		if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
-			iommu_gas_free_region(entry);
-		else
-			iommu_gas_free_space(entry);
-	}
-}
-
 static void
 dmar_qi_task(void *arg, int pending __unused)
 {
 	struct dmar_unit *unit;
 	uint32_t ics;
 
-	unit = arg;
-	dmar_qi_drain_tlb_flush(unit);
+	unit = IOMMU2DMAR(arg);
+	iommu_qi_drain_tlb_flush(DMAR2IOMMU(unit));
 
 	/*
 	 * Request an interrupt on the completion of the next invalidation
@@ -453,16 +324,16 @@ dmar_qi_task(void *arg, int pending __unused)
 		 * Otherwise, such entries will linger until a later entry
 		 * that requests an interrupt is processed.
 		 */
-		dmar_qi_drain_tlb_flush(unit);
+		iommu_qi_drain_tlb_flush(DMAR2IOMMU(unit));
 	}
 
-	if (unit->inv_seq_waiters > 0) {
+	if (unit->x86c.inv_seq_waiters > 0) {
 		/*
 		 * Acquire the DMAR lock so that wakeup() is called only after
 		 * the waiter is sleeping.
 		 */
 		DMAR_LOCK(unit);
-		wakeup(&unit->inv_seq_waiters);
+		wakeup(&unit->x86c.inv_seq_waiters);
 		DMAR_UNLOCK(unit);
 	}
 }
@@ -472,7 +343,7 @@ dmar_init_qi(struct dmar_unit *unit)
 {
 	uint64_t iqa;
 	uint32_t ics;
-	int qi_sz;
+	u_int qi_sz;
 
 	if (!DMAR_HAS_QI(unit) || (unit->hw_cap & DMAR_CAP_CM) != 0)
 		return (0);
@@ -481,34 +352,19 @@ dmar_init_qi(struct dmar_unit *unit)
 	if (!unit->qi_enabled)
 		return (0);
 
-	unit->tlb_flush_head = unit->tlb_flush_tail =
-            iommu_gas_alloc_entry(NULL, 0);
-	TASK_INIT(&unit->qi_task, 0, dmar_qi_task, unit);
-	unit->qi_taskqueue = taskqueue_create_fast("dmarqf", M_WAITOK,
-	    taskqueue_thread_enqueue, &unit->qi_taskqueue);
-	taskqueue_start_threads(&unit->qi_taskqueue, 1, PI_AV,
-	    "dmar%d qi taskq", unit->iommu.unit);
-
-	unit->inv_waitd_gen = 0;
-	unit->inv_waitd_seq = 1;
-
-	qi_sz = DMAR_IQA_QS_DEF;
-	TUNABLE_INT_FETCH("hw.dmar.qi_size", &qi_sz);
-	if (qi_sz > DMAR_IQA_QS_MAX)
-		qi_sz = DMAR_IQA_QS_MAX;
-	unit->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE;
-	/* Reserve one descriptor to prevent wraparound. */
-	unit->inv_queue_avail = unit->inv_queue_size - DMAR_IQ_DESCR_SZ;
-
-	/* The invalidation queue reads by DMARs are always coherent. */
-	unit->inv_queue = kmem_alloc_contig(unit->inv_queue_size, M_WAITOK |
-	    M_ZERO, 0, iommu_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
-	unit->inv_waitd_seq_hw_phys = pmap_kextract(
-	    (vm_offset_t)&unit->inv_waitd_seq_hw);
+	unit->x86c.qi_buf_maxsz = DMAR_IQA_QS_MAX;
+	unit->x86c.qi_cmd_sz = DMAR_IQ_DESCR_SZ;
+	iommu_qi_common_init(DMAR2IOMMU(unit), dmar_qi_task);
+	get_x86_iommu()->qi_ensure = dmar_qi_ensure;
+	get_x86_iommu()->qi_emit_wait_descr = dmar_qi_emit_wait_descr;
+	get_x86_iommu()->qi_advance_tail = dmar_qi_advance_tail;
+	get_x86_iommu()->qi_invalidate_emit = dmar_qi_invalidate_emit;
+
+	qi_sz = ilog2(unit->x86c.inv_queue_size / PAGE_SIZE);
 
 	DMAR_LOCK(unit);
 	dmar_write8(unit, DMAR_IQT_REG, 0);
-	iqa = pmap_kextract((uintptr_t)unit->inv_queue);
+	iqa = pmap_kextract((uintptr_t)unit->x86c.inv_queue);
 	iqa |= qi_sz;
 	dmar_write8(unit, DMAR_IQA_REG, iqa);
 	dmar_enable_qi(unit);
@@ -523,35 +379,19 @@ dmar_init_qi(struct dmar_unit *unit)
 	return (0);
 }
 
+static void
+dmar_fini_qi_helper(struct iommu_unit *iommu)
+{
+	dmar_disable_qi_intr(IOMMU2DMAR(iommu));
+	dmar_disable_qi(IOMMU2DMAR(iommu));
+}
+
 void
 dmar_fini_qi(struct dmar_unit *unit)
 {
-	struct iommu_qi_genseq gseq;
-
 	if (!unit->qi_enabled)
 		return;
-	taskqueue_drain(unit->qi_taskqueue, &unit->qi_task);
-	taskqueue_free(unit->qi_taskqueue);
-	unit->qi_taskqueue = NULL;
-
-	DMAR_LOCK(unit);
-	/* quisce */
-	dmar_qi_ensure(unit, 1);
-	dmar_qi_emit_wait_seq(unit, &gseq, true);
-	/* See dmar_qi_invalidate_sync_locked(). */
-	unit->inv_seq_waiters++;
-	dmar_qi_advance_tail(unit);
-	dmar_qi_wait_for_seq(unit, &gseq, false);
-	/* only after the quisce, disable queue */
-	dmar_disable_qi_intr(unit);
-	dmar_disable_qi(unit);
-	KASSERT(unit->inv_seq_waiters == 0,
-	    ("dmar%d: waiters on disabled queue", unit->iommu.unit));
-	DMAR_UNLOCK(unit);
-
-	kmem_free(unit->inv_queue, unit->inv_queue_size);
-	unit->inv_queue = NULL;
-	unit->inv_queue_size = 0;
+	iommu_qi_common_fini(DMAR2IOMMU(unit), dmar_fini_qi_helper);
 	unit->qi_enabled = 0;
 }
 
diff --git a/sys/x86/iommu/iommu_utils.c b/sys/x86/iommu/iommu_utils.c
index ea2c0358e072..571e5a2e65cd 100644
--- a/sys/x86/iommu/iommu_utils.c
+++ b/sys/x86/iommu/iommu_utils.c
@@ -29,7 +29,9 @@
  */
 
 #include <sys/systm.h>
+#include <sys/kernel.h>
 #include <sys/lock.h>
+#include <sys/malloc.h>
 #include <sys/memdesc.h>
 #include <sys/mutex.h>
 #include <sys/sf_buf.h>
@@ -40,8 +42,11 @@
 #include <sys/taskqueue.h>
 #include <sys/tree.h>
 #include <vm/vm.h>
-#include <vm/vm_page.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
 #include <vm/vm_object.h>
+#include <vm/vm_page.h>
 #include <dev/pci/pcireg.h>
 #include <machine/atomic.h>
 #include <machine/bus.h>
@@ -251,3 +256,230 @@ iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
 {
 	return (x86_iommu->unmap_ioapic_intr(ioapic_id, cookie));
 }
+
+#define	IOMMU2X86C(iommu)	(x86_iommu->get_x86_common(iommu))
+
+static bool
+iommu_qi_seq_processed(struct iommu_unit *unit,
+    const struct iommu_qi_genseq *pseq)
+{
+	struct x86_unit_common *x86c;
+	u_int gen;
+
+	x86c = IOMMU2X86C(unit);
+	gen = x86c->inv_waitd_gen;
+	return (pseq->gen < gen ||
+	    (pseq->gen == gen && pseq->seq <= x86c->inv_waitd_seq_hw));
+}
+
+void
+iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq *pseq,
+    bool emit_wait)
+{
+	struct x86_unit_common *x86c;
+	struct iommu_qi_genseq gsec;
+	uint32_t seq;
+
+	KASSERT(pseq != NULL, ("wait descriptor with no place for seq"));
+	IOMMU_ASSERT_LOCKED(unit);
+	x86c = IOMMU2X86C(unit);
+
+	if (x86c->inv_waitd_seq == 0xffffffff) {
+		gsec.gen = x86c->inv_waitd_gen;
+		gsec.seq = x86c->inv_waitd_seq;
+		x86_iommu->qi_ensure(unit, 1);
+		x86_iommu->qi_emit_wait_descr(unit, gsec.seq, false,
+		    true, false);
+		x86_iommu->qi_advance_tail(unit);
+		while (!iommu_qi_seq_processed(unit, &gsec))
+			cpu_spinwait();
+		x86c->inv_waitd_gen++;
+		x86c->inv_waitd_seq = 1;
+	}
+	seq = x86c->inv_waitd_seq++;
+	pseq->gen = x86c->inv_waitd_gen;
+	pseq->seq = seq;
+	if (emit_wait) {
+		x86_iommu->qi_ensure(unit, 1);
+		x86_iommu->qi_emit_wait_descr(unit, seq, true, true, false);
+	}
+}
+
+/*
+ * To avoid missed wakeups, callers must increment the unit's waiters count
+ * before advancing the tail past the wait descriptor.
+ */
+void
+iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct iommu_qi_genseq *
+    gseq, bool nowait)
+{
+	struct x86_unit_common *x86c;
+
+	IOMMU_ASSERT_LOCKED(unit);
+	x86c = IOMMU2X86C(unit);
+
+	KASSERT(x86c->inv_seq_waiters > 0, ("%s: no waiters", __func__));
+	while (!iommu_qi_seq_processed(unit, gseq)) {
+		if (cold || nowait) {
+			cpu_spinwait();
+		} else {
+			msleep(&x86c->inv_seq_waiters, &unit->lock, 0,
+			    "dmarse", hz);
+		}
+	}
+	x86c->inv_seq_waiters--;
+}
+
+/*
+ * The caller must not be using the entry's dmamap_link field.
+ */
+void
+iommu_qi_invalidate_locked(struct iommu_domain *domain,
+    struct iommu_map_entry *entry, bool emit_wait)
+{
+	struct iommu_unit *unit;
+	struct x86_unit_common *x86c;
+
+	unit = domain->iommu;
+	x86c = IOMMU2X86C(unit);
+	IOMMU_ASSERT_LOCKED(unit);
+
+	x86_iommu->qi_invalidate_emit(domain, entry->start, entry->end -
+	    entry->start, &entry->gseq, emit_wait);
+
+	/*
+	 * To avoid a data race in dmar_qi_task(), the entry's gseq must be
+	 * initialized before the entry is added to the TLB flush list, and the
+	 * entry must be added to that list before the tail is advanced.  More
+	 * precisely, the tail must not be advanced past the wait descriptor
+	 * that will generate the interrupt that schedules dmar_qi_task() for
+	 * execution before the entry is added to the list.  While an earlier
+	 * call to dmar_qi_ensure() might have advanced the tail, it will not
+	 * advance it past the wait descriptor.
+	 *
+	 * See the definition of struct dmar_unit for more information on
+	 * synchronization.
+	 */
+	entry->tlb_flush_next = NULL;
+	atomic_store_rel_ptr((uintptr_t *)&x86c->tlb_flush_tail->
+	    tlb_flush_next, (uintptr_t)entry);
+	x86c->tlb_flush_tail = entry;
+
+	x86_iommu->qi_advance_tail(unit);
+}
+
+void
+iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base,
+    iommu_gaddr_t size, bool cansleep)
+{
+	struct iommu_unit *unit;
+	struct iommu_qi_genseq gseq;
+
+	unit = domain->iommu;
+	IOMMU_LOCK(unit);
+	x86_iommu->qi_invalidate_emit(domain, base, size, &gseq, true);
+
+	/*
+	 * To avoid a missed wakeup in iommu_qi_task(), the unit's
+	 * waiters count must be incremented before the tail is
+	 * advanced.
+	 */
+	IOMMU2X86C(unit)->inv_seq_waiters++;
+
+	x86_iommu->qi_advance_tail(unit);
+	iommu_qi_wait_for_seq(unit, &gseq, !cansleep);
+	IOMMU_UNLOCK(unit);
+}
+
+void
+iommu_qi_drain_tlb_flush(struct iommu_unit *unit)
+{
+	struct x86_unit_common *x86c;
+	struct iommu_map_entry *entry, *head;
+
+	x86c = IOMMU2X86C(unit);
+	for (head = x86c->tlb_flush_head;; head = entry) {
+		entry = (struct iommu_map_entry *)
+		    atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next);
+		if (entry == NULL ||
+		    !iommu_qi_seq_processed(unit, &entry->gseq))
+			break;
+		x86c->tlb_flush_head = entry;
+		iommu_gas_free_entry(head);
+		if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
+			iommu_gas_free_region(entry);
+		else
+			iommu_gas_free_space(entry);
+	}
+}
+
+void
+iommu_qi_common_init(struct iommu_unit *unit, task_fn_t qi_task)
+{
+	struct x86_unit_common *x86c;
+	u_int qi_sz;
+
+	x86c = IOMMU2X86C(unit);
+
+	x86c->tlb_flush_head = x86c->tlb_flush_tail =
+            iommu_gas_alloc_entry(NULL, 0);
+	TASK_INIT(&x86c->qi_task, 0, qi_task, unit);
+	x86c->qi_taskqueue = taskqueue_create_fast("iommuqf", M_WAITOK,
+	    taskqueue_thread_enqueue, &x86c->qi_taskqueue);
+	taskqueue_start_threads(&x86c->qi_taskqueue, 1, PI_AV,
+	    "iommu%d qi taskq", unit->unit);
+
+	x86c->inv_waitd_gen = 0;
+	x86c->inv_waitd_seq = 1;
+
+	qi_sz = 3;
+	TUNABLE_INT_FETCH("hw.iommu.qi_size", &qi_sz);
+	if (qi_sz > x86c->qi_buf_maxsz)
+		qi_sz = x86c->qi_buf_maxsz;
+	x86c->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE;
+	/* Reserve one descriptor to prevent wraparound. */
+	x86c->inv_queue_avail = x86c->inv_queue_size -
+	    x86c->qi_cmd_sz;
+
+	/*
+	 * The invalidation queue reads by DMARs/AMDIOMMUs are always
+	 * coherent.
+	 */
+	x86c->inv_queue = kmem_alloc_contig(x86c->inv_queue_size,
+	    M_WAITOK | M_ZERO, 0, iommu_high, PAGE_SIZE, 0,
+	    VM_MEMATTR_DEFAULT);
+	x86c->inv_waitd_seq_hw_phys = pmap_kextract(
+	    (vm_offset_t)&x86c->inv_waitd_seq_hw);
+}
+
+void
+iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)(
+    struct iommu_unit *))
+{
+	struct x86_unit_common *x86c;
+	struct iommu_qi_genseq gseq;
+
+	x86c = IOMMU2X86C(unit);
+
+	taskqueue_drain(x86c->qi_taskqueue, &x86c->qi_task);
+	taskqueue_free(x86c->qi_taskqueue);
+	x86c->qi_taskqueue = NULL;
+
+	IOMMU_LOCK(unit);
+	/* quisce */
+	x86_iommu->qi_ensure(unit, 1);
+	iommu_qi_emit_wait_seq(unit, &gseq, true);
+	/* See iommu_qi_invalidate_locked(). */
+	x86c->inv_seq_waiters++;
+	x86_iommu->qi_advance_tail(unit);
+	iommu_qi_wait_for_seq(unit, &gseq, false);
+	/* only after the quisce, disable queue */
+	disable_qi(unit);
+	KASSERT(x86c->inv_seq_waiters == 0,
+	    ("iommu%d: waiters on disabled queue", unit->unit));
+	IOMMU_UNLOCK(unit);
+
+	kmem_free(x86c->inv_queue, x86c->inv_queue_size);
+	x86c->inv_queue = NULL;
+	x86c->inv_queue_size = 0;
+}
diff --git a/sys/x86/iommu/x86_iommu.h b/sys/x86/iommu/x86_iommu.h
index 8c908964acd0..eb1bbafbeb77 100644
--- a/sys/x86/iommu/x86_iommu.h
+++ b/sys/x86/iommu/x86_iommu.h
@@ -59,7 +59,18 @@ extern int iommu_tbl_pagecnt;
 SYSCTL_DECL(_hw_iommu);
 SYSCTL_DECL(_hw_iommu_dmar);
 
+struct x86_unit_common;
+
 struct x86_iommu {
+	struct x86_unit_common *(*get_x86_common)(struct
+	    iommu_unit *iommu);
+	void (*qi_ensure)(struct iommu_unit *unit, int descr_count);
+	void (*qi_emit_wait_descr)(struct iommu_unit *unit, uint32_t seq,
+	    bool, bool, bool);
+	void (*qi_advance_tail)(struct iommu_unit *unit);
+	void (*qi_invalidate_emit)(struct iommu_domain *idomain,
+	    iommu_gaddr_t base, iommu_gaddr_t size, struct iommu_qi_genseq *
+	    pseq, bool emit_wait);
 	void (*domain_unload_entry)(struct iommu_map_entry *entry, bool free,
 	    bool cansleep);
 	void (*domain_unload)(struct iommu_domain *iodom,
@@ -82,4 +93,65 @@ struct x86_iommu {
 void set_x86_iommu(struct x86_iommu *);
 struct x86_iommu *get_x86_iommu(void);
 
+struct x86_unit_common {
+	uint32_t qi_buf_maxsz;
+	uint32_t qi_cmd_sz;
+
+	char *inv_queue;
+	vm_size_t inv_queue_size;
+	uint32_t inv_queue_avail;
+	uint32_t inv_queue_tail;
+	volatile uint32_t inv_waitd_seq_hw; /* hw writes there on wait
+					       descr completion */
*** 52 LINES SKIPPED ***